1 /* Dependency checks for instruction scheduling, shared between ARM and
4 Copyright (C) 1991-2018 Free Software Foundation, Inc.
5 Contributed by ARM Ltd.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
24 #define IN_TARGET_CODE 1
28 #include "coretypes.h"
33 /* In ARMv8-A there's a general expectation that AESE/AESMC
34 and AESD/AESIMC sequences of the form:
39 will issue both instructions in a single cycle on super-scalar
40 implementations. This function identifies such pairs. */
43 aarch_crypto_can_dual_issue (rtx_insn
*producer_insn
, rtx_insn
*consumer_insn
)
45 rtx producer_set
, consumer_set
;
46 rtx producer_src
, consumer_src
;
48 producer_set
= single_set (producer_insn
);
49 consumer_set
= single_set (consumer_insn
);
51 producer_src
= producer_set
? SET_SRC (producer_set
) : NULL
;
52 consumer_src
= consumer_set
? SET_SRC (consumer_set
) : NULL
;
54 if (producer_src
&& consumer_src
55 && GET_CODE (producer_src
) == UNSPEC
&& GET_CODE (consumer_src
) == UNSPEC
56 && ((XINT (producer_src
, 1) == UNSPEC_AESE
57 && XINT (consumer_src
, 1) == UNSPEC_AESMC
)
58 || (XINT (producer_src
, 1) == UNSPEC_AESD
59 && XINT (consumer_src
, 1) == UNSPEC_AESIMC
)))
61 unsigned int regno
= REGNO (SET_DEST (producer_set
));
63 /* Before reload the registers are virtual, so the destination of
64 consumer_set doesn't need to match. */
66 return (REGNO (SET_DEST (consumer_set
)) == regno
|| !reload_completed
)
67 && REGNO (XVECEXP (consumer_src
, 0, 0)) == regno
;
73 /* Return TRUE if X is either an arithmetic shift left, or
74 is a multiplication by a power of two. */
76 arm_rtx_shift_left_p (rtx x
)
78 enum rtx_code code
= GET_CODE (x
);
80 if (code
== MULT
&& CONST_INT_P (XEXP (x
, 1))
81 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0)
90 static rtx_code shift_rtx_codes
[] =
91 { ASHIFT
, ROTATE
, ASHIFTRT
, LSHIFTRT
,
92 ROTATERT
, ZERO_EXTEND
, SIGN_EXTEND
};
94 /* Traverse PATTERN looking for a sub-rtx with RTX_CODE CODE.
95 If FIND_ANY_SHIFT then we are interested in anything which can
96 reasonably be described as a SHIFT RTX. */
98 arm_find_sub_rtx_with_code (rtx pattern
, rtx_code code
, bool find_any_shift
)
100 subrtx_var_iterator::array_type array
;
101 FOR_EACH_SUBRTX_VAR (iter
, array
, pattern
, NONCONST
)
106 /* Left shifts might have been canonicalized to a MULT of some
107 power of two. Make sure we catch them. */
108 if (arm_rtx_shift_left_p (x
))
111 for (unsigned int i
= 0; i
< ARRAY_SIZE (shift_rtx_codes
); i
++)
112 if (GET_CODE (x
) == shift_rtx_codes
[i
])
116 if (GET_CODE (x
) == code
)
122 /* Traverse PATTERN looking for any sub-rtx which looks like a shift. */
124 arm_find_shift_sub_rtx (rtx pattern
)
126 return arm_find_sub_rtx_with_code (pattern
, ASHIFT
, true);
129 /* PRODUCER and CONSUMER are two potentially dependant RTX. PRODUCER
130 (possibly) contains a SET which will provide a result we can access
131 using the SET_DEST macro. We will place the RTX which would be
132 written by PRODUCER in SET_SOURCE.
133 Similarly, CONSUMER (possibly) contains a SET which has an operand
134 we can access using SET_SRC. We place this operand in
137 Return nonzero if we found the SET RTX we expected. */
139 arm_get_set_operands (rtx producer
, rtx consumer
,
140 rtx
*set_source
, rtx
*set_destination
)
142 rtx set_producer
= arm_find_sub_rtx_with_code (PATTERN (producer
),
144 rtx set_consumer
= arm_find_sub_rtx_with_code (PATTERN (consumer
),
147 if (set_producer
&& set_consumer
)
149 *set_source
= SET_DEST (set_producer
);
150 *set_destination
= SET_SRC (set_consumer
);
157 aarch_rev16_shright_mask_imm_p (rtx val
, machine_mode mode
)
159 return CONST_INT_P (val
)
161 == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff),
166 aarch_rev16_shleft_mask_imm_p (rtx val
, machine_mode mode
)
168 return CONST_INT_P (val
)
170 == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff00),
176 aarch_rev16_p_1 (rtx lhs
, rtx rhs
, machine_mode mode
)
178 if (GET_CODE (lhs
) == AND
179 && GET_CODE (XEXP (lhs
, 0)) == ASHIFT
180 && CONST_INT_P (XEXP (XEXP (lhs
, 0), 1))
181 && INTVAL (XEXP (XEXP (lhs
, 0), 1)) == 8
182 && REG_P (XEXP (XEXP (lhs
, 0), 0))
183 && CONST_INT_P (XEXP (lhs
, 1))
184 && GET_CODE (rhs
) == AND
185 && GET_CODE (XEXP (rhs
, 0)) == LSHIFTRT
186 && REG_P (XEXP (XEXP (rhs
, 0), 0))
187 && CONST_INT_P (XEXP (XEXP (rhs
, 0), 1))
188 && INTVAL (XEXP (XEXP (rhs
, 0), 1)) == 8
189 && CONST_INT_P (XEXP (rhs
, 1))
190 && REGNO (XEXP (XEXP (rhs
, 0), 0)) == REGNO (XEXP (XEXP (lhs
, 0), 0)))
193 rtx lhs_mask
= XEXP (lhs
, 1);
194 rtx rhs_mask
= XEXP (rhs
, 1);
196 return aarch_rev16_shright_mask_imm_p (rhs_mask
, mode
)
197 && aarch_rev16_shleft_mask_imm_p (lhs_mask
, mode
);
203 /* Recognise a sequence of bitwise operations corresponding to a rev16 operation.
204 These will be of the form:
205 ((x >> 8) & 0x00ff00ff)
206 | ((x << 8) & 0xff00ff00)
207 for SImode and with similar but wider bitmasks for DImode.
208 The two sub-expressions of the IOR can appear on either side so check both
209 permutations with the help of aarch_rev16_p_1 above. */
212 aarch_rev16_p (rtx x
)
214 rtx left_sub_rtx
, right_sub_rtx
;
217 if (GET_CODE (x
) != IOR
)
220 left_sub_rtx
= XEXP (x
, 0);
221 right_sub_rtx
= XEXP (x
, 1);
223 /* There are no canonicalisation rules for the position of the two shifts
224 involved in a rev, so try both permutations. */
225 is_rev
= aarch_rev16_p_1 (left_sub_rtx
, right_sub_rtx
, GET_MODE (x
));
228 is_rev
= aarch_rev16_p_1 (right_sub_rtx
, left_sub_rtx
, GET_MODE (x
));
233 /* Return nonzero if the CONSUMER instruction (a load) does need
234 PRODUCER's value to calculate the address. */
236 arm_early_load_addr_dep (rtx producer
, rtx consumer
)
240 if (!arm_get_set_operands (producer
, consumer
, &value
, &addr
))
243 return reg_overlap_mentioned_p (value
, addr
);
246 /* Return nonzero if the CONSUMER instruction (a load) does need
247 a Pmode PRODUCER's value to calculate the address. */
250 arm_early_load_addr_dep_ptr (rtx producer
, rtx consumer
)
252 rtx value
= arm_find_sub_rtx_with_code (PATTERN (producer
), SET
, false);
253 rtx addr
= arm_find_sub_rtx_with_code (PATTERN (consumer
), SET
, false);
255 if (!value
|| !addr
|| !MEM_P (SET_SRC (value
)))
258 value
= SET_DEST (value
);
259 addr
= SET_SRC (addr
);
261 return GET_MODE (value
) == Pmode
&& reg_overlap_mentioned_p (value
, addr
);
264 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
265 have an early register shift value or amount dependency on the
266 result of PRODUCER. */
268 arm_no_early_alu_shift_dep (rtx producer
, rtx consumer
)
273 if (!arm_get_set_operands (producer
, consumer
, &value
, &op
))
276 if ((early_op
= arm_find_shift_sub_rtx (op
)))
277 return !reg_overlap_mentioned_p (value
, early_op
);
282 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
283 have an early register shift value dependency on the result of
286 arm_no_early_alu_shift_value_dep (rtx producer
, rtx consumer
)
291 if (!arm_get_set_operands (producer
, consumer
, &value
, &op
))
294 if ((early_op
= arm_find_shift_sub_rtx (op
)))
295 /* We want to check the value being shifted. */
296 if (!reg_overlap_mentioned_p (value
, XEXP (early_op
, 0)))
302 /* Return nonzero if the CONSUMER (a mul or mac op) does not
303 have an early register mult dependency on the result of
306 arm_no_early_mul_dep (rtx producer
, rtx consumer
)
310 if (!arm_get_set_operands (producer
, consumer
, &value
, &op
))
313 if (GET_CODE (op
) == PLUS
|| GET_CODE (op
) == MINUS
)
315 if (GET_CODE (XEXP (op
, 0)) == MULT
)
316 return !reg_overlap_mentioned_p (value
, XEXP (op
, 0));
318 return !reg_overlap_mentioned_p (value
, XEXP (op
, 1));
324 /* Return nonzero if the CONSUMER instruction (a store) does not need
325 PRODUCER's value to calculate the address. */
328 arm_no_early_store_addr_dep (rtx producer
, rtx consumer
)
330 rtx value
= arm_find_sub_rtx_with_code (PATTERN (producer
), SET
, false);
331 rtx addr
= arm_find_sub_rtx_with_code (PATTERN (consumer
), SET
, false);
334 value
= SET_DEST (value
);
337 addr
= SET_DEST (addr
);
342 return !reg_overlap_mentioned_p (value
, addr
);
345 /* Return nonzero if the CONSUMER instruction (a store) does need
346 PRODUCER's value to calculate the address. */
349 arm_early_store_addr_dep (rtx producer
, rtx consumer
)
351 return !arm_no_early_store_addr_dep (producer
, consumer
);
354 /* Return nonzero if the CONSUMER instruction (a store) does need
355 a Pmode PRODUCER's value to calculate the address. */
358 arm_early_store_addr_dep_ptr (rtx producer
, rtx consumer
)
360 rtx value
= arm_find_sub_rtx_with_code (PATTERN (producer
), SET
, false);
361 rtx addr
= arm_find_sub_rtx_with_code (PATTERN (consumer
), SET
, false);
363 if (!value
|| !addr
|| !MEM_P (SET_SRC (value
)))
366 value
= SET_DEST (value
);
367 addr
= SET_DEST (addr
);
369 return GET_MODE (value
) == Pmode
&& reg_overlap_mentioned_p (value
, addr
);
372 /* Return non-zero iff the consumer (a multiply-accumulate or a
373 multiple-subtract instruction) has an accumulator dependency on the
374 result of the producer and no other dependency on that result. It
375 does not check if the producer is multiply-accumulate instruction. */
377 arm_mac_accumulator_is_result (rtx producer
, rtx consumer
)
382 producer
= PATTERN (producer
);
383 consumer
= PATTERN (consumer
);
385 if (GET_CODE (producer
) == COND_EXEC
)
386 producer
= COND_EXEC_CODE (producer
);
387 if (GET_CODE (consumer
) == COND_EXEC
)
388 consumer
= COND_EXEC_CODE (consumer
);
390 if (GET_CODE (producer
) != SET
)
393 result
= XEXP (producer
, 0);
395 if (GET_CODE (consumer
) != SET
)
398 /* Check that the consumer is of the form
399 (set (...) (plus (mult ...) (...)))
401 (set (...) (minus (...) (mult ...))). */
402 if (GET_CODE (XEXP (consumer
, 1)) == PLUS
)
404 if (GET_CODE (XEXP (XEXP (consumer
, 1), 0)) != MULT
)
407 op0
= XEXP (XEXP (XEXP (consumer
, 1), 0), 0);
408 op1
= XEXP (XEXP (XEXP (consumer
, 1), 0), 1);
409 acc
= XEXP (XEXP (consumer
, 1), 1);
411 else if (GET_CODE (XEXP (consumer
, 1)) == MINUS
)
413 if (GET_CODE (XEXP (XEXP (consumer
, 1), 1)) != MULT
)
416 op0
= XEXP (XEXP (XEXP (consumer
, 1), 1), 0);
417 op1
= XEXP (XEXP (XEXP (consumer
, 1), 1), 1);
418 acc
= XEXP (XEXP (consumer
, 1), 0);
423 return (reg_overlap_mentioned_p (result
, acc
)
424 && !reg_overlap_mentioned_p (result
, op0
)
425 && !reg_overlap_mentioned_p (result
, op1
));
428 /* Return non-zero if the destination of PRODUCER feeds the accumulator
429 operand of an MLA-like operation. */
432 aarch_accumulator_forwarding (rtx_insn
*producer
, rtx_insn
*consumer
)
434 rtx producer_set
= single_set (producer
);
435 rtx consumer_set
= single_set (consumer
);
437 /* We are looking for a SET feeding a SET. */
438 if (!producer_set
|| !consumer_set
)
441 rtx dest
= SET_DEST (producer_set
);
442 rtx mla
= SET_SRC (consumer_set
);
444 /* We're looking for a register SET. */
450 /* Strip a zero_extend. */
451 if (GET_CODE (mla
) == ZERO_EXTEND
)
454 switch (GET_CODE (mla
))
457 /* Possibly an MADD. */
458 if (GET_CODE (XEXP (mla
, 0)) == MULT
)
459 accumulator
= XEXP (mla
, 1);
464 /* Possibly an MSUB. */
465 if (GET_CODE (XEXP (mla
, 1)) == MULT
)
466 accumulator
= XEXP (mla
, 0);
472 /* Possibly an FMADD/FMSUB/FNMADD/FNMSUB. */
473 if (REG_P (XEXP (mla
, 1))
474 && REG_P (XEXP (mla
, 2))
475 && (REG_P (XEXP (mla
, 0))
476 || GET_CODE (XEXP (mla
, 0)) == NEG
))
480 accumulator
= XEXP (mla
, 2);
482 else if (REG_P (XEXP (mla
, 1))
483 && GET_CODE (XEXP (mla
, 2)) == NEG
484 && (REG_P (XEXP (mla
, 0))
485 || GET_CODE (XEXP (mla
, 0)) == NEG
))
488 accumulator
= XEXP (XEXP (mla
, 2), 0);
495 /* Not an MLA-like operation. */
499 if (GET_CODE (accumulator
) == SUBREG
)
500 accumulator
= SUBREG_REG (accumulator
);
502 if (!REG_P (accumulator
))
505 return (REGNO (dest
) == REGNO (accumulator
));
508 /* Return non-zero if the consumer (a multiply-accumulate instruction)
509 has an accumulator dependency on the result of the producer (a
510 multiplication instruction) and no other dependency on that result. */
512 arm_mac_accumulator_is_mul_result (rtx producer
, rtx consumer
)
514 rtx mul
= PATTERN (producer
);
515 rtx mac
= PATTERN (consumer
);
517 rtx mac_op0
, mac_op1
, mac_acc
;
519 if (GET_CODE (mul
) == COND_EXEC
)
520 mul
= COND_EXEC_CODE (mul
);
521 if (GET_CODE (mac
) == COND_EXEC
)
522 mac
= COND_EXEC_CODE (mac
);
524 /* Check that mul is of the form (set (...) (mult ...))
525 and mla is of the form (set (...) (plus (mult ...) (...))). */
526 if ((GET_CODE (mul
) != SET
|| GET_CODE (XEXP (mul
, 1)) != MULT
)
527 || (GET_CODE (mac
) != SET
|| GET_CODE (XEXP (mac
, 1)) != PLUS
528 || GET_CODE (XEXP (XEXP (mac
, 1), 0)) != MULT
))
531 mul_result
= XEXP (mul
, 0);
532 mac_op0
= XEXP (XEXP (XEXP (mac
, 1), 0), 0);
533 mac_op1
= XEXP (XEXP (XEXP (mac
, 1), 0), 1);
534 mac_acc
= XEXP (XEXP (mac
, 1), 1);
536 return (reg_overlap_mentioned_p (mul_result
, mac_acc
)
537 && !reg_overlap_mentioned_p (mul_result
, mac_op0
)
538 && !reg_overlap_mentioned_p (mul_result
, mac_op1
));