1 /* Dependency checks for instruction scheduling, shared between ARM and
4 Copyright (C) 1991-2017 Free Software Foundation, Inc.
5 Contributed by ARM Ltd.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
31 /* In ARMv8-A there's a general expectation that AESE/AESMC
32 and AESD/AESIMC sequences of the form:
37 will issue both instructions in a single cycle on super-scalar
38 implementations. This function identifies such pairs. */
41 aarch_crypto_can_dual_issue (rtx_insn
*producer_insn
, rtx_insn
*consumer_insn
)
43 rtx producer_set
, consumer_set
;
44 rtx producer_src
, consumer_src
;
46 producer_set
= single_set (producer_insn
);
47 consumer_set
= single_set (consumer_insn
);
49 producer_src
= producer_set
? SET_SRC (producer_set
) : NULL
;
50 consumer_src
= consumer_set
? SET_SRC (consumer_set
) : NULL
;
52 if (producer_src
&& consumer_src
53 && GET_CODE (producer_src
) == UNSPEC
&& GET_CODE (consumer_src
) == UNSPEC
54 && ((XINT (producer_src
, 1) == UNSPEC_AESE
55 && XINT (consumer_src
, 1) == UNSPEC_AESMC
)
56 || (XINT (producer_src
, 1) == UNSPEC_AESD
57 && XINT (consumer_src
, 1) == UNSPEC_AESIMC
)))
59 unsigned int regno
= REGNO (SET_DEST (producer_set
));
61 /* Before reload the registers are virtual, so the destination of
62 consumer_set doesn't need to match. */
64 return (REGNO (SET_DEST (consumer_set
)) == regno
|| !reload_completed
)
65 && REGNO (XVECEXP (consumer_src
, 0, 0)) == regno
;
71 /* Return TRUE if X is either an arithmetic shift left, or
72 is a multiplication by a power of two. */
74 arm_rtx_shift_left_p (rtx x
)
76 enum rtx_code code
= GET_CODE (x
);
78 if (code
== MULT
&& CONST_INT_P (XEXP (x
, 1))
79 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0)
88 static rtx_code shift_rtx_codes
[] =
89 { ASHIFT
, ROTATE
, ASHIFTRT
, LSHIFTRT
,
90 ROTATERT
, ZERO_EXTEND
, SIGN_EXTEND
};
92 /* Traverse PATTERN looking for a sub-rtx with RTX_CODE CODE.
93 If FIND_ANY_SHIFT then we are interested in anything which can
94 reasonably be described as a SHIFT RTX. */
96 arm_find_sub_rtx_with_code (rtx pattern
, rtx_code code
, bool find_any_shift
)
98 subrtx_var_iterator::array_type array
;
99 FOR_EACH_SUBRTX_VAR (iter
, array
, pattern
, NONCONST
)
104 /* Left shifts might have been canonicalized to a MULT of some
105 power of two. Make sure we catch them. */
106 if (arm_rtx_shift_left_p (x
))
109 for (unsigned int i
= 0; i
< ARRAY_SIZE (shift_rtx_codes
); i
++)
110 if (GET_CODE (x
) == shift_rtx_codes
[i
])
114 if (GET_CODE (x
) == code
)
120 /* Traverse PATTERN looking for any sub-rtx which looks like a shift. */
122 arm_find_shift_sub_rtx (rtx pattern
)
124 return arm_find_sub_rtx_with_code (pattern
, ASHIFT
, true);
127 /* PRODUCER and CONSUMER are two potentially dependant RTX. PRODUCER
128 (possibly) contains a SET which will provide a result we can access
129 using the SET_DEST macro. We will place the RTX which would be
130 written by PRODUCER in SET_SOURCE.
131 Similarly, CONSUMER (possibly) contains a SET which has an operand
132 we can access using SET_SRC. We place this operand in
135 Return nonzero if we found the SET RTX we expected. */
137 arm_get_set_operands (rtx producer
, rtx consumer
,
138 rtx
*set_source
, rtx
*set_destination
)
140 rtx set_producer
= arm_find_sub_rtx_with_code (PATTERN (producer
),
142 rtx set_consumer
= arm_find_sub_rtx_with_code (PATTERN (consumer
),
145 if (set_producer
&& set_consumer
)
147 *set_source
= SET_DEST (set_producer
);
148 *set_destination
= SET_SRC (set_consumer
);
155 aarch_rev16_shright_mask_imm_p (rtx val
, machine_mode mode
)
157 return CONST_INT_P (val
)
159 == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff),
164 aarch_rev16_shleft_mask_imm_p (rtx val
, machine_mode mode
)
166 return CONST_INT_P (val
)
168 == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff00),
174 aarch_rev16_p_1 (rtx lhs
, rtx rhs
, machine_mode mode
)
176 if (GET_CODE (lhs
) == AND
177 && GET_CODE (XEXP (lhs
, 0)) == ASHIFT
178 && CONST_INT_P (XEXP (XEXP (lhs
, 0), 1))
179 && INTVAL (XEXP (XEXP (lhs
, 0), 1)) == 8
180 && REG_P (XEXP (XEXP (lhs
, 0), 0))
181 && CONST_INT_P (XEXP (lhs
, 1))
182 && GET_CODE (rhs
) == AND
183 && GET_CODE (XEXP (rhs
, 0)) == LSHIFTRT
184 && REG_P (XEXP (XEXP (rhs
, 0), 0))
185 && CONST_INT_P (XEXP (XEXP (rhs
, 0), 1))
186 && INTVAL (XEXP (XEXP (rhs
, 0), 1)) == 8
187 && CONST_INT_P (XEXP (rhs
, 1))
188 && REGNO (XEXP (XEXP (rhs
, 0), 0)) == REGNO (XEXP (XEXP (lhs
, 0), 0)))
191 rtx lhs_mask
= XEXP (lhs
, 1);
192 rtx rhs_mask
= XEXP (rhs
, 1);
194 return aarch_rev16_shright_mask_imm_p (rhs_mask
, mode
)
195 && aarch_rev16_shleft_mask_imm_p (lhs_mask
, mode
);
201 /* Recognise a sequence of bitwise operations corresponding to a rev16 operation.
202 These will be of the form:
203 ((x >> 8) & 0x00ff00ff)
204 | ((x << 8) & 0xff00ff00)
205 for SImode and with similar but wider bitmasks for DImode.
206 The two sub-expressions of the IOR can appear on either side so check both
207 permutations with the help of aarch_rev16_p_1 above. */
210 aarch_rev16_p (rtx x
)
212 rtx left_sub_rtx
, right_sub_rtx
;
215 if (GET_CODE (x
) != IOR
)
218 left_sub_rtx
= XEXP (x
, 0);
219 right_sub_rtx
= XEXP (x
, 1);
221 /* There are no canonicalisation rules for the position of the two shifts
222 involved in a rev, so try both permutations. */
223 is_rev
= aarch_rev16_p_1 (left_sub_rtx
, right_sub_rtx
, GET_MODE (x
));
226 is_rev
= aarch_rev16_p_1 (right_sub_rtx
, left_sub_rtx
, GET_MODE (x
));
231 /* Return nonzero if the CONSUMER instruction (a load) does need
232 PRODUCER's value to calculate the address. */
234 arm_early_load_addr_dep (rtx producer
, rtx consumer
)
238 if (!arm_get_set_operands (producer
, consumer
, &value
, &addr
))
241 return reg_overlap_mentioned_p (value
, addr
);
244 /* Return nonzero if the CONSUMER instruction (a load) does need
245 a Pmode PRODUCER's value to calculate the address. */
248 arm_early_load_addr_dep_ptr (rtx producer
, rtx consumer
)
250 rtx value
= arm_find_sub_rtx_with_code (PATTERN (producer
), SET
, false);
251 rtx addr
= arm_find_sub_rtx_with_code (PATTERN (consumer
), SET
, false);
253 if (!value
|| !addr
|| !MEM_P (SET_SRC (value
)))
256 value
= SET_DEST (value
);
257 addr
= SET_SRC (addr
);
259 return GET_MODE (value
) == Pmode
&& reg_overlap_mentioned_p (value
, addr
);
262 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
263 have an early register shift value or amount dependency on the
264 result of PRODUCER. */
266 arm_no_early_alu_shift_dep (rtx producer
, rtx consumer
)
271 if (!arm_get_set_operands (producer
, consumer
, &value
, &op
))
274 if ((early_op
= arm_find_shift_sub_rtx (op
)))
275 return !reg_overlap_mentioned_p (value
, early_op
);
280 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
281 have an early register shift value dependency on the result of
284 arm_no_early_alu_shift_value_dep (rtx producer
, rtx consumer
)
289 if (!arm_get_set_operands (producer
, consumer
, &value
, &op
))
292 if ((early_op
= arm_find_shift_sub_rtx (op
)))
293 /* We want to check the value being shifted. */
294 if (!reg_overlap_mentioned_p (value
, XEXP (early_op
, 0)))
300 /* Return nonzero if the CONSUMER (a mul or mac op) does not
301 have an early register mult dependency on the result of
304 arm_no_early_mul_dep (rtx producer
, rtx consumer
)
308 if (!arm_get_set_operands (producer
, consumer
, &value
, &op
))
311 if (GET_CODE (op
) == PLUS
|| GET_CODE (op
) == MINUS
)
313 if (GET_CODE (XEXP (op
, 0)) == MULT
)
314 return !reg_overlap_mentioned_p (value
, XEXP (op
, 0));
316 return !reg_overlap_mentioned_p (value
, XEXP (op
, 1));
322 /* Return nonzero if the CONSUMER instruction (a store) does not need
323 PRODUCER's value to calculate the address. */
326 arm_no_early_store_addr_dep (rtx producer
, rtx consumer
)
328 rtx value
= arm_find_sub_rtx_with_code (PATTERN (producer
), SET
, false);
329 rtx addr
= arm_find_sub_rtx_with_code (PATTERN (consumer
), SET
, false);
332 value
= SET_DEST (value
);
335 addr
= SET_DEST (addr
);
340 return !reg_overlap_mentioned_p (value
, addr
);
343 /* Return nonzero if the CONSUMER instruction (a store) does need
344 PRODUCER's value to calculate the address. */
347 arm_early_store_addr_dep (rtx producer
, rtx consumer
)
349 return !arm_no_early_store_addr_dep (producer
, consumer
);
352 /* Return nonzero if the CONSUMER instruction (a store) does need
353 a Pmode PRODUCER's value to calculate the address. */
356 arm_early_store_addr_dep_ptr (rtx producer
, rtx consumer
)
358 rtx value
= arm_find_sub_rtx_with_code (PATTERN (producer
), SET
, false);
359 rtx addr
= arm_find_sub_rtx_with_code (PATTERN (consumer
), SET
, false);
361 if (!value
|| !addr
|| !MEM_P (SET_SRC (value
)))
364 value
= SET_DEST (value
);
365 addr
= SET_DEST (addr
);
367 return GET_MODE (value
) == Pmode
&& reg_overlap_mentioned_p (value
, addr
);
370 /* Return non-zero iff the consumer (a multiply-accumulate or a
371 multiple-subtract instruction) has an accumulator dependency on the
372 result of the producer and no other dependency on that result. It
373 does not check if the producer is multiply-accumulate instruction. */
375 arm_mac_accumulator_is_result (rtx producer
, rtx consumer
)
380 producer
= PATTERN (producer
);
381 consumer
= PATTERN (consumer
);
383 if (GET_CODE (producer
) == COND_EXEC
)
384 producer
= COND_EXEC_CODE (producer
);
385 if (GET_CODE (consumer
) == COND_EXEC
)
386 consumer
= COND_EXEC_CODE (consumer
);
388 if (GET_CODE (producer
) != SET
)
391 result
= XEXP (producer
, 0);
393 if (GET_CODE (consumer
) != SET
)
396 /* Check that the consumer is of the form
397 (set (...) (plus (mult ...) (...)))
399 (set (...) (minus (...) (mult ...))). */
400 if (GET_CODE (XEXP (consumer
, 1)) == PLUS
)
402 if (GET_CODE (XEXP (XEXP (consumer
, 1), 0)) != MULT
)
405 op0
= XEXP (XEXP (XEXP (consumer
, 1), 0), 0);
406 op1
= XEXP (XEXP (XEXP (consumer
, 1), 0), 1);
407 acc
= XEXP (XEXP (consumer
, 1), 1);
409 else if (GET_CODE (XEXP (consumer
, 1)) == MINUS
)
411 if (GET_CODE (XEXP (XEXP (consumer
, 1), 1)) != MULT
)
414 op0
= XEXP (XEXP (XEXP (consumer
, 1), 1), 0);
415 op1
= XEXP (XEXP (XEXP (consumer
, 1), 1), 1);
416 acc
= XEXP (XEXP (consumer
, 1), 0);
421 return (reg_overlap_mentioned_p (result
, acc
)
422 && !reg_overlap_mentioned_p (result
, op0
)
423 && !reg_overlap_mentioned_p (result
, op1
));
426 /* Return non-zero if the destination of PRODUCER feeds the accumulator
427 operand of an MLA-like operation. */
430 aarch_accumulator_forwarding (rtx_insn
*producer
, rtx_insn
*consumer
)
432 rtx producer_set
= single_set (producer
);
433 rtx consumer_set
= single_set (consumer
);
435 /* We are looking for a SET feeding a SET. */
436 if (!producer_set
|| !consumer_set
)
439 rtx dest
= SET_DEST (producer_set
);
440 rtx mla
= SET_SRC (consumer_set
);
442 /* We're looking for a register SET. */
448 /* Strip a zero_extend. */
449 if (GET_CODE (mla
) == ZERO_EXTEND
)
452 switch (GET_CODE (mla
))
455 /* Possibly an MADD. */
456 if (GET_CODE (XEXP (mla
, 0)) == MULT
)
457 accumulator
= XEXP (mla
, 1);
462 /* Possibly an MSUB. */
463 if (GET_CODE (XEXP (mla
, 1)) == MULT
)
464 accumulator
= XEXP (mla
, 0);
470 /* Possibly an FMADD/FMSUB/FNMADD/FNMSUB. */
471 if (REG_P (XEXP (mla
, 1))
472 && REG_P (XEXP (mla
, 2))
473 && (REG_P (XEXP (mla
, 0))
474 || GET_CODE (XEXP (mla
, 0)) == NEG
))
478 accumulator
= XEXP (mla
, 2);
480 else if (REG_P (XEXP (mla
, 1))
481 && GET_CODE (XEXP (mla
, 2)) == NEG
482 && (REG_P (XEXP (mla
, 0))
483 || GET_CODE (XEXP (mla
, 0)) == NEG
))
486 accumulator
= XEXP (XEXP (mla
, 2), 0);
493 /* Not an MLA-like operation. */
497 if (GET_CODE (accumulator
) == SUBREG
)
498 accumulator
= SUBREG_REG (accumulator
);
500 if (!REG_P (accumulator
))
503 return (REGNO (dest
) == REGNO (accumulator
));
506 /* Return non-zero if the consumer (a multiply-accumulate instruction)
507 has an accumulator dependency on the result of the producer (a
508 multiplication instruction) and no other dependency on that result. */
510 arm_mac_accumulator_is_mul_result (rtx producer
, rtx consumer
)
512 rtx mul
= PATTERN (producer
);
513 rtx mac
= PATTERN (consumer
);
515 rtx mac_op0
, mac_op1
, mac_acc
;
517 if (GET_CODE (mul
) == COND_EXEC
)
518 mul
= COND_EXEC_CODE (mul
);
519 if (GET_CODE (mac
) == COND_EXEC
)
520 mac
= COND_EXEC_CODE (mac
);
522 /* Check that mul is of the form (set (...) (mult ...))
523 and mla is of the form (set (...) (plus (mult ...) (...))). */
524 if ((GET_CODE (mul
) != SET
|| GET_CODE (XEXP (mul
, 1)) != MULT
)
525 || (GET_CODE (mac
) != SET
|| GET_CODE (XEXP (mac
, 1)) != PLUS
526 || GET_CODE (XEXP (XEXP (mac
, 1), 0)) != MULT
))
529 mul_result
= XEXP (mul
, 0);
530 mac_op0
= XEXP (XEXP (XEXP (mac
, 1), 0), 0);
531 mac_op1
= XEXP (XEXP (XEXP (mac
, 1), 0), 1);
532 mac_acc
= XEXP (XEXP (mac
, 1), 1);
534 return (reg_overlap_mentioned_p (mul_result
, mac_acc
)
535 && !reg_overlap_mentioned_p (mul_result
, mac_op0
)
536 && !reg_overlap_mentioned_p (mul_result
, mac_op1
));