From c44b93a4101f6e626e14bb9017661645e2bfea4e Mon Sep 17 00:00:00 2001 From: pinskia Date: Wed, 21 Jun 2017 15:58:12 +0000 Subject: [PATCH] 2017-06-21 Andrew Pinski * config/aarch64/aarch64-cost-tables.h (thunderx_extra_costs): Increment Arith_shift and Arith_shift_reg by 1. * config/aarch64/aarch64-tuning-flags.def (cheap_shift_extend): New tuning flag. * config/aarch64/aarch64.c (thunderx_tunings): Enable AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND. (aarch64_strip_extend): Add new argument and test for it. (aarch64_cheap_mult_shift_p): New function. (aarch64_rtx_mult_cost): Call aarch64_cheap_mult_shift_p and don't add a cost if it is true. Update calls to aarch64_strip_extend. (aarch64_rtx_costs): Update calls to aarch64_strip_extend. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@249459 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 15 ++++++++ gcc/config/aarch64/aarch64-cost-tables.h | 4 +- gcc/config/aarch64/aarch64-tuning-flags.def | 6 +++ gcc/config/aarch64/aarch64.c | 60 ++++++++++++++++++++++++----- 4 files changed, 73 insertions(+), 12 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1b41a7f6643..ea97b0badff 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,20 @@ 2017-06-21 Andrew Pinski + * config/aarch64/aarch64-cost-tables.h (thunderx_extra_costs): + Increment Arith_shift and Arith_shift_reg by 1. + * config/aarch64/aarch64-tuning-flags.def (cheap_shift_extend): + New tuning flag. + * config/aarch64/aarch64.c (thunderx_tunings): Enable + AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND. + (aarch64_strip_extend): Add new argument and test for it. + (aarch64_cheap_mult_shift_p): New function. + (aarch64_rtx_mult_cost): Call aarch64_cheap_mult_shift_p and don't + add a cost if it is true. + Update calls to aarch64_strip_extend. + (aarch64_rtx_costs): Update calls to aarch64_strip_extend. + +2017-06-21 Andrew Pinski + * config/aarch64/aarch64-cores.def (thunderxt88p1): Use thunderxt88 tunings. (thunderxt88): Likewise. diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h index 070c083beb3..5d149bd09af 100644 --- a/gcc/config/aarch64/aarch64-cost-tables.h +++ b/gcc/config/aarch64/aarch64-cost-tables.h @@ -136,8 +136,8 @@ const struct cpu_cost_table thunderx_extra_costs = 0, /* Logical. */ 0, /* Shift. */ 0, /* Shift_reg. */ - COSTS_N_INSNS (1), /* Arith_shift. */ - COSTS_N_INSNS (1), /* Arith_shift_reg. */ + COSTS_N_INSNS (1)+1, /* Arith_shift. */ + COSTS_N_INSNS (1)+1, /* Arith_shift_reg. */ COSTS_N_INSNS (1), /* UNUSED: Log_shift. */ COSTS_N_INSNS (1), /* UNUSED: Log_shift_reg. */ 0, /* Extend. */ diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def index 9f980e3e498..f48642c2808 100644 --- a/gcc/config/aarch64/aarch64-tuning-flags.def +++ b/gcc/config/aarch64/aarch64-tuning-flags.def @@ -35,4 +35,10 @@ two load/stores are not at least 8 byte aligned don't create load/store pairs. */ AARCH64_EXTRA_TUNING_OPTION ("slow_unaligned_ldpw", SLOW_UNALIGNED_LDPW) +/* Some of the optional shift to some arthematic instructions are + considered cheap. Logical shift left <=4 with or without a + zero extend are considered cheap. Sign extend; non logical shift left + are not considered cheap. */ +AARCH64_EXTRA_TUNING_OPTION ("cheap_shift_extend", CHEAP_SHIFT_EXTEND) + #undef AARCH64_EXTRA_TUNING_OPTION diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 97937dd74cf..3364a02e89c 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -809,7 +809,8 @@ static const struct tune_params thunderx_tunings = 2, /* min_div_recip_mul_df. */ 0, /* max_case_values. */ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */ - (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW), /* tune_flags. */ + (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW + | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */ &thunderx_prefetch_tune }; @@ -6120,9 +6121,10 @@ aarch64_strip_shift (rtx x) /* Helper function for rtx cost calculation. Strip an extend expression from X. Returns the inner operand if successful, or the original expression on failure. We deal with a number of possible - canonicalization variations here. */ + canonicalization variations here. If STRIP_SHIFT is true, then + we can strip off a shift also. */ static rtx -aarch64_strip_extend (rtx x) +aarch64_strip_extend (rtx x, bool strip_shift) { rtx op = x; @@ -6146,7 +6148,8 @@ aarch64_strip_extend (rtx x) /* Now handle extended register, as this may also have an optional left shift by 1..4. */ - if (GET_CODE (op) == ASHIFT + if (strip_shift + && GET_CODE (op) == ASHIFT && CONST_INT_P (XEXP (op, 1)) && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4) op = XEXP (op, 0); @@ -6170,6 +6173,39 @@ aarch64_shift_p (enum rtx_code code) return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT; } + +/* Return true iff X is a cheap shift without a sign extend. */ + +static bool +aarch64_cheap_mult_shift_p (rtx x) +{ + rtx op0, op1; + + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + + if (!(aarch64_tune_params.extra_tuning_flags + & AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND)) + return false; + + if (GET_CODE (op0) == SIGN_EXTEND) + return false; + + if (GET_CODE (x) == ASHIFT && CONST_INT_P (op1) + && UINTVAL (op1) <= 4) + return true; + + if (GET_CODE (x) != MULT || !CONST_INT_P (op1)) + return false; + + HOST_WIDE_INT l2 = exact_log2 (INTVAL (op1)); + + if (l2 > 0 && l2 <= 4) + return true; + + return false; +} + /* Helper function for rtx cost calculation. Calculate the cost of a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx. Return the calculated cost of the expression, recursing manually in to @@ -6207,7 +6243,11 @@ aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed) { if (compound_p) { - if (REG_P (op1)) + /* If the shift is considered cheap, + then don't add any cost. */ + if (aarch64_cheap_mult_shift_p (x)) + ; + else if (REG_P (op1)) /* ARITH + shift-by-register. */ cost += extra_cost->alu.arith_shift_reg; else if (is_extend) @@ -6225,7 +6265,7 @@ aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed) } /* Strip extends as we will have costed them in the case above. */ if (is_extend) - op0 = aarch64_strip_extend (op0); + op0 = aarch64_strip_extend (op0, true); cost += rtx_cost (op0, VOIDmode, code, 0, speed); @@ -7069,13 +7109,13 @@ cost_minus: if (speed) *cost += extra_cost->alu.extend_arith; - op1 = aarch64_strip_extend (op1); + op1 = aarch64_strip_extend (op1, true); *cost += rtx_cost (op1, VOIDmode, (enum rtx_code) GET_CODE (op1), 0, speed); return true; } - rtx new_op1 = aarch64_strip_extend (op1); + rtx new_op1 = aarch64_strip_extend (op1, false); /* Cost this as an FMA-alike operation. */ if ((GET_CODE (new_op1) == MULT @@ -7148,7 +7188,7 @@ cost_plus: if (speed) *cost += extra_cost->alu.extend_arith; - op0 = aarch64_strip_extend (op0); + op0 = aarch64_strip_extend (op0, true); *cost += rtx_cost (op0, VOIDmode, (enum rtx_code) GET_CODE (op0), 0, speed); return true; @@ -7156,7 +7196,7 @@ cost_plus: /* Strip any extend, leave shifts behind as we will cost them through mult_cost. */ - new_op0 = aarch64_strip_extend (op0); + new_op0 = aarch64_strip_extend (op0, false); if (GET_CODE (new_op0) == MULT || aarch64_shift_p (GET_CODE (new_op0))) -- 2.11.4.GIT