1 /* Dependency checks for instruction scheduling, shared between ARM and
4 Copyright (C) 1991-2017 Free Software Foundation, Inc.
5 Contributed by ARM Ltd.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
31 /* In ARMv8-A there's a general expectation that AESE/AESMC
32 and AESD/AESIMC sequences of the form:
37 will issue both instructions in a single cycle on super-scalar
38 implementations. This function identifies such pairs. */
41 aarch_crypto_can_dual_issue (rtx_insn
*producer_insn
, rtx_insn
*consumer_insn
)
43 rtx producer_set
, consumer_set
;
44 rtx producer_src
, consumer_src
;
46 producer_set
= single_set (producer_insn
);
47 consumer_set
= single_set (consumer_insn
);
49 producer_src
= producer_set
? SET_SRC (producer_set
) : NULL
;
50 consumer_src
= consumer_set
? SET_SRC (consumer_set
) : NULL
;
52 if (producer_src
&& consumer_src
53 && GET_CODE (producer_src
) == UNSPEC
&& GET_CODE (consumer_src
) == UNSPEC
54 && ((XINT (producer_src
, 1) == UNSPEC_AESE
55 && XINT (consumer_src
, 1) == UNSPEC_AESMC
)
56 || (XINT (producer_src
, 1) == UNSPEC_AESD
57 && XINT (consumer_src
, 1) == UNSPEC_AESIMC
)))
59 unsigned int regno
= REGNO (SET_DEST (producer_set
));
61 /* Before reload the registers are virtual, so the destination of
62 consumer_set doesn't need to match. */
64 return (REGNO (SET_DEST (consumer_set
)) == regno
|| !reload_completed
)
65 && REGNO (XVECEXP (consumer_src
, 0, 0)) == regno
;
71 /* Return TRUE if X is either an arithmetic shift left, or
72 is a multiplication by a power of two. */
74 arm_rtx_shift_left_p (rtx x
)
76 enum rtx_code code
= GET_CODE (x
);
78 if (code
== MULT
&& CONST_INT_P (XEXP (x
, 1))
79 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0)
88 static rtx_code shift_rtx_codes
[] =
89 { ASHIFT
, ROTATE
, ASHIFTRT
, LSHIFTRT
,
90 ROTATERT
, ZERO_EXTEND
, SIGN_EXTEND
};
92 /* Traverse PATTERN looking for a sub-rtx with RTX_CODE CODE.
93 If FIND_ANY_SHIFT then we are interested in anything which can
94 reasonably be described as a SHIFT RTX. */
96 arm_find_sub_rtx_with_code (rtx pattern
, rtx_code code
, bool find_any_shift
)
98 subrtx_var_iterator::array_type array
;
99 FOR_EACH_SUBRTX_VAR (iter
, array
, pattern
, NONCONST
)
104 /* Left shifts might have been canonicalized to a MULT of some
105 power of two. Make sure we catch them. */
106 if (arm_rtx_shift_left_p (x
))
109 for (unsigned int i
= 0; i
< ARRAY_SIZE (shift_rtx_codes
); i
++)
110 if (GET_CODE (x
) == shift_rtx_codes
[i
])
114 if (GET_CODE (x
) == code
)
120 /* Traverse PATTERN looking for any sub-rtx which looks like a shift. */
122 arm_find_shift_sub_rtx (rtx pattern
)
124 return arm_find_sub_rtx_with_code (pattern
, ASHIFT
, true);
127 /* PRODUCER and CONSUMER are two potentially dependant RTX. PRODUCER
128 (possibly) contains a SET which will provide a result we can access
129 using the SET_DEST macro. We will place the RTX which would be
130 written by PRODUCER in SET_SOURCE.
131 Similarly, CONSUMER (possibly) contains a SET which has an operand
132 we can access using SET_SRC. We place this operand in
135 Return nonzero if we found the SET RTX we expected. */
137 arm_get_set_operands (rtx producer
, rtx consumer
,
138 rtx
*set_source
, rtx
*set_destination
)
140 rtx set_producer
= arm_find_sub_rtx_with_code (PATTERN (producer
),
142 rtx set_consumer
= arm_find_sub_rtx_with_code (PATTERN (consumer
),
145 if (set_producer
&& set_consumer
)
147 *set_source
= SET_DEST (set_producer
);
148 *set_destination
= SET_SRC (set_consumer
);
155 aarch_rev16_shright_mask_imm_p (rtx val
, machine_mode mode
)
157 return CONST_INT_P (val
)
159 == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff),
164 aarch_rev16_shleft_mask_imm_p (rtx val
, machine_mode mode
)
166 return CONST_INT_P (val
)
168 == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff00),
174 aarch_rev16_p_1 (rtx lhs
, rtx rhs
, machine_mode mode
)
176 if (GET_CODE (lhs
) == AND
177 && GET_CODE (XEXP (lhs
, 0)) == ASHIFT
178 && CONST_INT_P (XEXP (XEXP (lhs
, 0), 1))
179 && INTVAL (XEXP (XEXP (lhs
, 0), 1)) == 8
180 && REG_P (XEXP (XEXP (lhs
, 0), 0))
181 && CONST_INT_P (XEXP (lhs
, 1))
182 && GET_CODE (rhs
) == AND
183 && GET_CODE (XEXP (rhs
, 0)) == LSHIFTRT
184 && REG_P (XEXP (XEXP (rhs
, 0), 0))
185 && CONST_INT_P (XEXP (XEXP (rhs
, 0), 1))
186 && INTVAL (XEXP (XEXP (rhs
, 0), 1)) == 8
187 && CONST_INT_P (XEXP (rhs
, 1))
188 && REGNO (XEXP (XEXP (rhs
, 0), 0)) == REGNO (XEXP (XEXP (lhs
, 0), 0)))
191 rtx lhs_mask
= XEXP (lhs
, 1);
192 rtx rhs_mask
= XEXP (rhs
, 1);
194 return aarch_rev16_shright_mask_imm_p (rhs_mask
, mode
)
195 && aarch_rev16_shleft_mask_imm_p (lhs_mask
, mode
);
201 /* Recognise a sequence of bitwise operations corresponding to a rev16 operation.
202 These will be of the form:
203 ((x >> 8) & 0x00ff00ff)
204 | ((x << 8) & 0xff00ff00)
205 for SImode and with similar but wider bitmasks for DImode.
206 The two sub-expressions of the IOR can appear on either side so check both
207 permutations with the help of aarch_rev16_p_1 above. */
210 aarch_rev16_p (rtx x
)
212 rtx left_sub_rtx
, right_sub_rtx
;
215 if (GET_CODE (x
) != IOR
)
218 left_sub_rtx
= XEXP (x
, 0);
219 right_sub_rtx
= XEXP (x
, 1);
221 /* There are no canonicalisation rules for the position of the two shifts
222 involved in a rev, so try both permutations. */
223 is_rev
= aarch_rev16_p_1 (left_sub_rtx
, right_sub_rtx
, GET_MODE (x
));
226 is_rev
= aarch_rev16_p_1 (right_sub_rtx
, left_sub_rtx
, GET_MODE (x
));
231 /* Return nonzero if the CONSUMER instruction (a load) does need
232 PRODUCER's value to calculate the address. */
234 arm_early_load_addr_dep (rtx producer
, rtx consumer
)
238 if (!arm_get_set_operands (producer
, consumer
, &value
, &addr
))
241 return reg_overlap_mentioned_p (value
, addr
);
244 /* Return nonzero if the CONSUMER instruction (a load) does need
245 a Pmode PRODUCER's value to calculate the address. */
248 arm_early_load_addr_dep_ptr (rtx producer
, rtx consumer
)
250 rtx value
= arm_find_sub_rtx_with_code (PATTERN (producer
), SET
, false);
251 rtx addr
= arm_find_sub_rtx_with_code (PATTERN (consumer
), SET
, false);
253 if (!value
|| !addr
|| !MEM_P (SET_SRC (value
)))
256 value
= SET_DEST (value
);
257 addr
= SET_SRC (addr
);
259 return GET_MODE (value
) == Pmode
&& reg_overlap_mentioned_p (value
, addr
);
262 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
263 have an early register shift value or amount dependency on the
264 result of PRODUCER. */
266 arm_no_early_alu_shift_dep (rtx producer
, rtx consumer
)
271 if (!arm_get_set_operands (producer
, consumer
, &value
, &op
))
274 if ((early_op
= arm_find_shift_sub_rtx (op
)))
276 if (REG_P (early_op
))
279 return !reg_overlap_mentioned_p (value
, early_op
);
285 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
286 have an early register shift value dependency on the result of
289 arm_no_early_alu_shift_value_dep (rtx producer
, rtx consumer
)
294 if (!arm_get_set_operands (producer
, consumer
, &value
, &op
))
297 if ((early_op
= arm_find_shift_sub_rtx (op
)))
298 /* We want to check the value being shifted. */
299 if (!reg_overlap_mentioned_p (value
, XEXP (early_op
, 0)))
305 /* Return nonzero if the CONSUMER (a mul or mac op) does not
306 have an early register mult dependency on the result of
309 arm_no_early_mul_dep (rtx producer
, rtx consumer
)
313 if (!arm_get_set_operands (producer
, consumer
, &value
, &op
))
316 if (GET_CODE (op
) == PLUS
|| GET_CODE (op
) == MINUS
)
318 if (GET_CODE (XEXP (op
, 0)) == MULT
)
319 return !reg_overlap_mentioned_p (value
, XEXP (op
, 0));
321 return !reg_overlap_mentioned_p (value
, XEXP (op
, 1));
327 /* Return nonzero if the CONSUMER instruction (a store) does not need
328 PRODUCER's value to calculate the address. */
331 arm_no_early_store_addr_dep (rtx producer
, rtx consumer
)
333 rtx value
= arm_find_sub_rtx_with_code (PATTERN (producer
), SET
, false);
334 rtx addr
= arm_find_sub_rtx_with_code (PATTERN (consumer
), SET
, false);
337 value
= SET_DEST (value
);
340 addr
= SET_DEST (addr
);
345 return !reg_overlap_mentioned_p (value
, addr
);
348 /* Return nonzero if the CONSUMER instruction (a store) does need
349 PRODUCER's value to calculate the address. */
352 arm_early_store_addr_dep (rtx producer
, rtx consumer
)
354 return !arm_no_early_store_addr_dep (producer
, consumer
);
357 /* Return nonzero if the CONSUMER instruction (a store) does need
358 a Pmode PRODUCER's value to calculate the address. */
361 arm_early_store_addr_dep_ptr (rtx producer
, rtx consumer
)
363 rtx value
= arm_find_sub_rtx_with_code (PATTERN (producer
), SET
, false);
364 rtx addr
= arm_find_sub_rtx_with_code (PATTERN (consumer
), SET
, false);
366 if (!value
|| !addr
|| !MEM_P (SET_SRC (value
)))
369 value
= SET_DEST (value
);
370 addr
= SET_DEST (addr
);
372 return GET_MODE (value
) == Pmode
&& reg_overlap_mentioned_p (value
, addr
);
375 /* Return non-zero iff the consumer (a multiply-accumulate or a
376 multiple-subtract instruction) has an accumulator dependency on the
377 result of the producer and no other dependency on that result. It
378 does not check if the producer is multiply-accumulate instruction. */
380 arm_mac_accumulator_is_result (rtx producer
, rtx consumer
)
385 producer
= PATTERN (producer
);
386 consumer
= PATTERN (consumer
);
388 if (GET_CODE (producer
) == COND_EXEC
)
389 producer
= COND_EXEC_CODE (producer
);
390 if (GET_CODE (consumer
) == COND_EXEC
)
391 consumer
= COND_EXEC_CODE (consumer
);
393 if (GET_CODE (producer
) != SET
)
396 result
= XEXP (producer
, 0);
398 if (GET_CODE (consumer
) != SET
)
401 /* Check that the consumer is of the form
402 (set (...) (plus (mult ...) (...)))
404 (set (...) (minus (...) (mult ...))). */
405 if (GET_CODE (XEXP (consumer
, 1)) == PLUS
)
407 if (GET_CODE (XEXP (XEXP (consumer
, 1), 0)) != MULT
)
410 op0
= XEXP (XEXP (XEXP (consumer
, 1), 0), 0);
411 op1
= XEXP (XEXP (XEXP (consumer
, 1), 0), 1);
412 acc
= XEXP (XEXP (consumer
, 1), 1);
414 else if (GET_CODE (XEXP (consumer
, 1)) == MINUS
)
416 if (GET_CODE (XEXP (XEXP (consumer
, 1), 1)) != MULT
)
419 op0
= XEXP (XEXP (XEXP (consumer
, 1), 1), 0);
420 op1
= XEXP (XEXP (XEXP (consumer
, 1), 1), 1);
421 acc
= XEXP (XEXP (consumer
, 1), 0);
426 return (reg_overlap_mentioned_p (result
, acc
)
427 && !reg_overlap_mentioned_p (result
, op0
)
428 && !reg_overlap_mentioned_p (result
, op1
));
431 /* Return non-zero if the destination of PRODUCER feeds the accumulator
432 operand of an MLA-like operation. */
435 aarch_accumulator_forwarding (rtx_insn
*producer
, rtx_insn
*consumer
)
437 rtx producer_set
= single_set (producer
);
438 rtx consumer_set
= single_set (consumer
);
440 /* We are looking for a SET feeding a SET. */
441 if (!producer_set
|| !consumer_set
)
444 rtx dest
= SET_DEST (producer_set
);
445 rtx mla
= SET_SRC (consumer_set
);
447 /* We're looking for a register SET. */
453 /* Strip a zero_extend. */
454 if (GET_CODE (mla
) == ZERO_EXTEND
)
457 switch (GET_CODE (mla
))
460 /* Possibly an MADD. */
461 if (GET_CODE (XEXP (mla
, 0)) == MULT
)
462 accumulator
= XEXP (mla
, 1);
467 /* Possibly an MSUB. */
468 if (GET_CODE (XEXP (mla
, 1)) == MULT
)
469 accumulator
= XEXP (mla
, 0);
475 /* Possibly an FMADD/FMSUB/FNMADD/FNMSUB. */
476 if (REG_P (XEXP (mla
, 1))
477 && REG_P (XEXP (mla
, 2))
478 && (REG_P (XEXP (mla
, 0))
479 || GET_CODE (XEXP (mla
, 0)) == NEG
))
483 accumulator
= XEXP (mla
, 2);
485 else if (REG_P (XEXP (mla
, 1))
486 && GET_CODE (XEXP (mla
, 2)) == NEG
487 && (REG_P (XEXP (mla
, 0))
488 || GET_CODE (XEXP (mla
, 0)) == NEG
))
491 accumulator
= XEXP (XEXP (mla
, 2), 0);
498 /* Not an MLA-like operation. */
502 if (GET_CODE (accumulator
) == SUBREG
)
503 accumulator
= SUBREG_REG (accumulator
);
505 if (!REG_P (accumulator
))
508 return (REGNO (dest
) == REGNO (accumulator
));
511 /* Return nonzero if the CONSUMER instruction is some sort of
512 arithmetic or logic + shift operation, and the register we are
513 writing in PRODUCER is not used in a register shift by register
517 aarch_forward_to_shift_is_not_shifted_reg (rtx_insn
*producer
,
523 if (!arm_get_set_operands (producer
, consumer
, &value
, &op
))
526 if ((early_op
= arm_find_shift_sub_rtx (op
)))
528 if (REG_P (early_op
))
531 /* Any other canonicalisation of a shift is a shift-by-constant
533 if (GET_CODE (early_op
) == ASHIFT
)
534 return (!REG_P (XEXP (early_op
, 0))
535 || !REG_P (XEXP (early_op
, 1)));
543 /* Return non-zero if the consumer (a multiply-accumulate instruction)
544 has an accumulator dependency on the result of the producer (a
545 multiplication instruction) and no other dependency on that result. */
547 arm_mac_accumulator_is_mul_result (rtx producer
, rtx consumer
)
549 rtx mul
= PATTERN (producer
);
550 rtx mac
= PATTERN (consumer
);
552 rtx mac_op0
, mac_op1
, mac_acc
;
554 if (GET_CODE (mul
) == COND_EXEC
)
555 mul
= COND_EXEC_CODE (mul
);
556 if (GET_CODE (mac
) == COND_EXEC
)
557 mac
= COND_EXEC_CODE (mac
);
559 /* Check that mul is of the form (set (...) (mult ...))
560 and mla is of the form (set (...) (plus (mult ...) (...))). */
561 if ((GET_CODE (mul
) != SET
|| GET_CODE (XEXP (mul
, 1)) != MULT
)
562 || (GET_CODE (mac
) != SET
|| GET_CODE (XEXP (mac
, 1)) != PLUS
563 || GET_CODE (XEXP (XEXP (mac
, 1), 0)) != MULT
))
566 mul_result
= XEXP (mul
, 0);
567 mac_op0
= XEXP (XEXP (XEXP (mac
, 1), 0), 0);
568 mac_op1
= XEXP (XEXP (XEXP (mac
, 1), 0), 1);
569 mac_acc
= XEXP (XEXP (mac
, 1), 1);
571 return (reg_overlap_mentioned_p (mul_result
, mac_acc
)
572 && !reg_overlap_mentioned_p (mul_result
, mac_op0
)
573 && !reg_overlap_mentioned_p (mul_result
, mac_op1
));