Code scheduling for Cortex-A53 isn't as good as it could be. It turns out
[official-gcc.git] / gcc / config / arm / aarch-common.c
blob6a04711335db292037ede5a801bdbaa830a3e524
1 /* Dependency checks for instruction scheduling, shared between ARM and
2 AARCH64.
4 Copyright (C) 1991-2017 Free Software Foundation, Inc.
5 Contributed by ARM Ltd.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "rtl-iter.h"
31 /* In ARMv8-A there's a general expectation that AESE/AESMC
32 and AESD/AESIMC sequences of the form:
34 AESE Vn, _
35 AESMC Vn, Vn
37 will issue both instructions in a single cycle on super-scalar
38 implementations. This function identifies such pairs. */
40 int
41 aarch_crypto_can_dual_issue (rtx_insn *producer_insn, rtx_insn *consumer_insn)
43 rtx producer_set, consumer_set;
44 rtx producer_src, consumer_src;
46 producer_set = single_set (producer_insn);
47 consumer_set = single_set (consumer_insn);
49 producer_src = producer_set ? SET_SRC (producer_set) : NULL;
50 consumer_src = consumer_set ? SET_SRC (consumer_set) : NULL;
52 if (producer_src && consumer_src
53 && GET_CODE (producer_src) == UNSPEC && GET_CODE (consumer_src) == UNSPEC
54 && ((XINT (producer_src, 1) == UNSPEC_AESE
55 && XINT (consumer_src, 1) == UNSPEC_AESMC)
56 || (XINT (producer_src, 1) == UNSPEC_AESD
57 && XINT (consumer_src, 1) == UNSPEC_AESIMC)))
59 unsigned int regno = REGNO (SET_DEST (producer_set));
61 /* Before reload the registers are virtual, so the destination of
62 consumer_set doesn't need to match. */
64 return (REGNO (SET_DEST (consumer_set)) == regno || !reload_completed)
65 && REGNO (XVECEXP (consumer_src, 0, 0)) == regno;
68 return 0;
71 /* Return TRUE if X is either an arithmetic shift left, or
72 is a multiplication by a power of two. */
73 bool
74 arm_rtx_shift_left_p (rtx x)
76 enum rtx_code code = GET_CODE (x);
78 if (code == MULT && CONST_INT_P (XEXP (x, 1))
79 && exact_log2 (INTVAL (XEXP (x, 1))) > 0)
80 return true;
82 if (code == ASHIFT)
83 return true;
85 return false;
88 static rtx_code shift_rtx_codes[] =
89 { ASHIFT, ROTATE, ASHIFTRT, LSHIFTRT,
90 ROTATERT, ZERO_EXTEND, SIGN_EXTEND };
92 /* Traverse PATTERN looking for a sub-rtx with RTX_CODE CODE.
93 If FIND_ANY_SHIFT then we are interested in anything which can
94 reasonably be described as a SHIFT RTX. */
95 static rtx
96 arm_find_sub_rtx_with_code (rtx pattern, rtx_code code, bool find_any_shift)
98 subrtx_var_iterator::array_type array;
99 FOR_EACH_SUBRTX_VAR (iter, array, pattern, NONCONST)
101 rtx x = *iter;
102 if (find_any_shift)
104 /* Left shifts might have been canonicalized to a MULT of some
105 power of two. Make sure we catch them. */
106 if (arm_rtx_shift_left_p (x))
107 return x;
108 else
109 for (unsigned int i = 0; i < ARRAY_SIZE (shift_rtx_codes); i++)
110 if (GET_CODE (x) == shift_rtx_codes[i])
111 return x;
114 if (GET_CODE (x) == code)
115 return x;
117 return NULL_RTX;
120 /* Traverse PATTERN looking for any sub-rtx which looks like a shift. */
121 static rtx
122 arm_find_shift_sub_rtx (rtx pattern)
124 return arm_find_sub_rtx_with_code (pattern, ASHIFT, true);
127 /* PRODUCER and CONSUMER are two potentially dependant RTX. PRODUCER
128 (possibly) contains a SET which will provide a result we can access
129 using the SET_DEST macro. We will place the RTX which would be
130 written by PRODUCER in SET_SOURCE.
131 Similarly, CONSUMER (possibly) contains a SET which has an operand
132 we can access using SET_SRC. We place this operand in
133 SET_DESTINATION.
135 Return nonzero if we found the SET RTX we expected. */
136 static int
137 arm_get_set_operands (rtx producer, rtx consumer,
138 rtx *set_source, rtx *set_destination)
140 rtx set_producer = arm_find_sub_rtx_with_code (PATTERN (producer),
141 SET, false);
142 rtx set_consumer = arm_find_sub_rtx_with_code (PATTERN (consumer),
143 SET, false);
145 if (set_producer && set_consumer)
147 *set_source = SET_DEST (set_producer);
148 *set_destination = SET_SRC (set_consumer);
149 return 1;
151 return 0;
154 bool
155 aarch_rev16_shright_mask_imm_p (rtx val, machine_mode mode)
157 return CONST_INT_P (val)
158 && INTVAL (val)
159 == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff),
160 mode);
163 bool
164 aarch_rev16_shleft_mask_imm_p (rtx val, machine_mode mode)
166 return CONST_INT_P (val)
167 && INTVAL (val)
168 == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff00),
169 mode);
173 static bool
174 aarch_rev16_p_1 (rtx lhs, rtx rhs, machine_mode mode)
176 if (GET_CODE (lhs) == AND
177 && GET_CODE (XEXP (lhs, 0)) == ASHIFT
178 && CONST_INT_P (XEXP (XEXP (lhs, 0), 1))
179 && INTVAL (XEXP (XEXP (lhs, 0), 1)) == 8
180 && REG_P (XEXP (XEXP (lhs, 0), 0))
181 && CONST_INT_P (XEXP (lhs, 1))
182 && GET_CODE (rhs) == AND
183 && GET_CODE (XEXP (rhs, 0)) == LSHIFTRT
184 && REG_P (XEXP (XEXP (rhs, 0), 0))
185 && CONST_INT_P (XEXP (XEXP (rhs, 0), 1))
186 && INTVAL (XEXP (XEXP (rhs, 0), 1)) == 8
187 && CONST_INT_P (XEXP (rhs, 1))
188 && REGNO (XEXP (XEXP (rhs, 0), 0)) == REGNO (XEXP (XEXP (lhs, 0), 0)))
191 rtx lhs_mask = XEXP (lhs, 1);
192 rtx rhs_mask = XEXP (rhs, 1);
194 return aarch_rev16_shright_mask_imm_p (rhs_mask, mode)
195 && aarch_rev16_shleft_mask_imm_p (lhs_mask, mode);
198 return false;
201 /* Recognise a sequence of bitwise operations corresponding to a rev16 operation.
202 These will be of the form:
203 ((x >> 8) & 0x00ff00ff)
204 | ((x << 8) & 0xff00ff00)
205 for SImode and with similar but wider bitmasks for DImode.
206 The two sub-expressions of the IOR can appear on either side so check both
207 permutations with the help of aarch_rev16_p_1 above. */
209 bool
210 aarch_rev16_p (rtx x)
212 rtx left_sub_rtx, right_sub_rtx;
213 bool is_rev = false;
215 if (GET_CODE (x) != IOR)
216 return false;
218 left_sub_rtx = XEXP (x, 0);
219 right_sub_rtx = XEXP (x, 1);
221 /* There are no canonicalisation rules for the position of the two shifts
222 involved in a rev, so try both permutations. */
223 is_rev = aarch_rev16_p_1 (left_sub_rtx, right_sub_rtx, GET_MODE (x));
225 if (!is_rev)
226 is_rev = aarch_rev16_p_1 (right_sub_rtx, left_sub_rtx, GET_MODE (x));
228 return is_rev;
231 /* Return nonzero if the CONSUMER instruction (a load) does need
232 PRODUCER's value to calculate the address. */
234 arm_early_load_addr_dep (rtx producer, rtx consumer)
236 rtx value, addr;
238 if (!arm_get_set_operands (producer, consumer, &value, &addr))
239 return 0;
241 return reg_overlap_mentioned_p (value, addr);
244 /* Return nonzero if the CONSUMER instruction (a load) does need
245 a Pmode PRODUCER's value to calculate the address. */
248 arm_early_load_addr_dep_ptr (rtx producer, rtx consumer)
250 rtx value = arm_find_sub_rtx_with_code (PATTERN (producer), SET, false);
251 rtx addr = arm_find_sub_rtx_with_code (PATTERN (consumer), SET, false);
253 if (!value || !addr || !MEM_P (SET_SRC (value)))
254 return 0;
256 value = SET_DEST (value);
257 addr = SET_SRC (addr);
259 return GET_MODE (value) == Pmode && reg_overlap_mentioned_p (value, addr);
262 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
263 have an early register shift value or amount dependency on the
264 result of PRODUCER. */
266 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
268 rtx value, op;
269 rtx early_op;
271 if (!arm_get_set_operands (producer, consumer, &value, &op))
272 return 0;
274 if ((early_op = arm_find_shift_sub_rtx (op)))
276 if (REG_P (early_op))
277 early_op = op;
279 return !reg_overlap_mentioned_p (value, early_op);
282 return 0;
285 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
286 have an early register shift value dependency on the result of
287 PRODUCER. */
289 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
291 rtx value, op;
292 rtx early_op;
294 if (!arm_get_set_operands (producer, consumer, &value, &op))
295 return 0;
297 if ((early_op = arm_find_shift_sub_rtx (op)))
298 /* We want to check the value being shifted. */
299 if (!reg_overlap_mentioned_p (value, XEXP (early_op, 0)))
300 return 1;
302 return 0;
305 /* Return nonzero if the CONSUMER (a mul or mac op) does not
306 have an early register mult dependency on the result of
307 PRODUCER. */
309 arm_no_early_mul_dep (rtx producer, rtx consumer)
311 rtx value, op;
313 if (!arm_get_set_operands (producer, consumer, &value, &op))
314 return 0;
316 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
318 if (GET_CODE (XEXP (op, 0)) == MULT)
319 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
320 else
321 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
324 return 0;
327 /* Return nonzero if the CONSUMER instruction (a store) does not need
328 PRODUCER's value to calculate the address. */
331 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
333 rtx value = arm_find_sub_rtx_with_code (PATTERN (producer), SET, false);
334 rtx addr = arm_find_sub_rtx_with_code (PATTERN (consumer), SET, false);
336 if (value)
337 value = SET_DEST (value);
339 if (addr)
340 addr = SET_DEST (addr);
342 if (!value || !addr)
343 return 0;
345 return !reg_overlap_mentioned_p (value, addr);
348 /* Return nonzero if the CONSUMER instruction (a store) does need
349 PRODUCER's value to calculate the address. */
352 arm_early_store_addr_dep (rtx producer, rtx consumer)
354 return !arm_no_early_store_addr_dep (producer, consumer);
357 /* Return nonzero if the CONSUMER instruction (a store) does need
358 a Pmode PRODUCER's value to calculate the address. */
361 arm_early_store_addr_dep_ptr (rtx producer, rtx consumer)
363 rtx value = arm_find_sub_rtx_with_code (PATTERN (producer), SET, false);
364 rtx addr = arm_find_sub_rtx_with_code (PATTERN (consumer), SET, false);
366 if (!value || !addr || !MEM_P (SET_SRC (value)))
367 return 0;
369 value = SET_DEST (value);
370 addr = SET_DEST (addr);
372 return GET_MODE (value) == Pmode && reg_overlap_mentioned_p (value, addr);
375 /* Return non-zero iff the consumer (a multiply-accumulate or a
376 multiple-subtract instruction) has an accumulator dependency on the
377 result of the producer and no other dependency on that result. It
378 does not check if the producer is multiply-accumulate instruction. */
380 arm_mac_accumulator_is_result (rtx producer, rtx consumer)
382 rtx result;
383 rtx op0, op1, acc;
385 producer = PATTERN (producer);
386 consumer = PATTERN (consumer);
388 if (GET_CODE (producer) == COND_EXEC)
389 producer = COND_EXEC_CODE (producer);
390 if (GET_CODE (consumer) == COND_EXEC)
391 consumer = COND_EXEC_CODE (consumer);
393 if (GET_CODE (producer) != SET)
394 return 0;
396 result = XEXP (producer, 0);
398 if (GET_CODE (consumer) != SET)
399 return 0;
401 /* Check that the consumer is of the form
402 (set (...) (plus (mult ...) (...)))
404 (set (...) (minus (...) (mult ...))). */
405 if (GET_CODE (XEXP (consumer, 1)) == PLUS)
407 if (GET_CODE (XEXP (XEXP (consumer, 1), 0)) != MULT)
408 return 0;
410 op0 = XEXP (XEXP (XEXP (consumer, 1), 0), 0);
411 op1 = XEXP (XEXP (XEXP (consumer, 1), 0), 1);
412 acc = XEXP (XEXP (consumer, 1), 1);
414 else if (GET_CODE (XEXP (consumer, 1)) == MINUS)
416 if (GET_CODE (XEXP (XEXP (consumer, 1), 1)) != MULT)
417 return 0;
419 op0 = XEXP (XEXP (XEXP (consumer, 1), 1), 0);
420 op1 = XEXP (XEXP (XEXP (consumer, 1), 1), 1);
421 acc = XEXP (XEXP (consumer, 1), 0);
423 else
424 return 0;
426 return (reg_overlap_mentioned_p (result, acc)
427 && !reg_overlap_mentioned_p (result, op0)
428 && !reg_overlap_mentioned_p (result, op1));
431 /* Return non-zero if the destination of PRODUCER feeds the accumulator
432 operand of an MLA-like operation. */
435 aarch_accumulator_forwarding (rtx_insn *producer, rtx_insn *consumer)
437 rtx producer_set = single_set (producer);
438 rtx consumer_set = single_set (consumer);
440 /* We are looking for a SET feeding a SET. */
441 if (!producer_set || !consumer_set)
442 return 0;
444 rtx dest = SET_DEST (producer_set);
445 rtx mla = SET_SRC (consumer_set);
447 /* We're looking for a register SET. */
448 if (!REG_P (dest))
449 return 0;
451 rtx accumulator;
453 /* Strip a zero_extend. */
454 if (GET_CODE (mla) == ZERO_EXTEND)
455 mla = XEXP (mla, 0);
457 switch (GET_CODE (mla))
459 case PLUS:
460 /* Possibly an MADD. */
461 if (GET_CODE (XEXP (mla, 0)) == MULT)
462 accumulator = XEXP (mla, 1);
463 else
464 return 0;
465 break;
466 case MINUS:
467 /* Possibly an MSUB. */
468 if (GET_CODE (XEXP (mla, 1)) == MULT)
469 accumulator = XEXP (mla, 0);
470 else
471 return 0;
472 break;
473 case FMA:
475 /* Possibly an FMADD/FMSUB/FNMADD/FNMSUB. */
476 if (REG_P (XEXP (mla, 1))
477 && REG_P (XEXP (mla, 2))
478 && (REG_P (XEXP (mla, 0))
479 || GET_CODE (XEXP (mla, 0)) == NEG))
482 /* FMADD/FMSUB. */
483 accumulator = XEXP (mla, 2);
485 else if (REG_P (XEXP (mla, 1))
486 && GET_CODE (XEXP (mla, 2)) == NEG
487 && (REG_P (XEXP (mla, 0))
488 || GET_CODE (XEXP (mla, 0)) == NEG))
490 /* FNMADD/FNMSUB. */
491 accumulator = XEXP (XEXP (mla, 2), 0);
493 else
494 return 0;
495 break;
497 default:
498 /* Not an MLA-like operation. */
499 return 0;
502 if (GET_CODE (accumulator) == SUBREG)
503 accumulator = SUBREG_REG (accumulator);
505 if (!REG_P (accumulator))
506 return 0;
508 return (REGNO (dest) == REGNO (accumulator));
511 /* Return nonzero if the CONSUMER instruction is some sort of
512 arithmetic or logic + shift operation, and the register we are
513 writing in PRODUCER is not used in a register shift by register
514 operation. */
517 aarch_forward_to_shift_is_not_shifted_reg (rtx_insn *producer,
518 rtx_insn *consumer)
520 rtx value, op;
521 rtx early_op;
523 if (!arm_get_set_operands (producer, consumer, &value, &op))
524 return 0;
526 if ((early_op = arm_find_shift_sub_rtx (op)))
528 if (REG_P (early_op))
529 early_op = op;
531 /* Any other canonicalisation of a shift is a shift-by-constant
532 so we don't care. */
533 if (GET_CODE (early_op) == ASHIFT)
534 return (!REG_P (XEXP (early_op, 0))
535 || !REG_P (XEXP (early_op, 1)));
536 else
537 return 1;
540 return 0;
543 /* Return non-zero if the consumer (a multiply-accumulate instruction)
544 has an accumulator dependency on the result of the producer (a
545 multiplication instruction) and no other dependency on that result. */
547 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
549 rtx mul = PATTERN (producer);
550 rtx mac = PATTERN (consumer);
551 rtx mul_result;
552 rtx mac_op0, mac_op1, mac_acc;
554 if (GET_CODE (mul) == COND_EXEC)
555 mul = COND_EXEC_CODE (mul);
556 if (GET_CODE (mac) == COND_EXEC)
557 mac = COND_EXEC_CODE (mac);
559 /* Check that mul is of the form (set (...) (mult ...))
560 and mla is of the form (set (...) (plus (mult ...) (...))). */
561 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
562 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
563 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
564 return 0;
566 mul_result = XEXP (mul, 0);
567 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
568 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
569 mac_acc = XEXP (XEXP (mac, 1), 1);
571 return (reg_overlap_mentioned_p (mul_result, mac_acc)
572 && !reg_overlap_mentioned_p (mul_result, mac_op0)
573 && !reg_overlap_mentioned_p (mul_result, mac_op1));