From a1aff58fb98dcba080629c73933b51850d1f0e1e Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Tue, 20 Jan 2015 00:58:19 +0100 Subject: [PATCH] re PR lto/45375 ([meta-bug] Issues with building Mozilla (i.e. Firefox) with LTO) PR lto/45375 * i386.c (gate): Check flag_expensive_optimizations and optimize_size. (ix86_option_override_internal): Drop optimize_size condition on MASK_ACCUMULATE_OUTGOING_ARGS, MASK_VZEROUPPER, MASK_AVX256_SPLIT_UNALIGNED_LOAD, MASK_AVX256_SPLIT_UNALIGNED_STORE, MASK_PREFER_AVX128. (ix86_avx256_split_vector_move_misalign, ix86_avx256_split_vector_move_misalign): Check optimize_insn_for_speed. * sse.md (all uses of TARGET_PREFER_AVX128): Add optimize_insn_for_speed_p check. From-SVN: r219871 --- gcc/ChangeLog | 14 ++++++++++++++ gcc/config/i386/i386.c | 48 +++++++++++++++++++++++------------------------- gcc/config/i386/sse.md | 10 +++++----- 3 files changed, 42 insertions(+), 30 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7e182f3c3f2..9a76dd70a12 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,17 @@ +2015-01-19 Jan Hubicka + + PR lto/45375 + * i386.c (gate): Check flag_expensive_optimizations and + optimize_size. + (ix86_option_override_internal): Drop optimize_size condition + on MASK_ACCUMULATE_OUTGOING_ARGS, MASK_VZEROUPPER, + MASK_AVX256_SPLIT_UNALIGNED_LOAD, MASK_AVX256_SPLIT_UNALIGNED_STORE, + MASK_PREFER_AVX128. + (ix86_avx256_split_vector_move_misalign, + ix86_avx256_split_vector_move_misalign): Check optimize_insn_for_speed. + * sse.md (all uses of TARGET_PREFER_AVX128): Add + optimize_insn_for_speed_p check. + 2015-01-19 Matthew Fortune * config/mips/mips.h (FP_ASM_SPEC): New define. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 3fa7842c68c..ef1b2883cad 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2578,7 +2578,9 @@ public: /* opt_pass methods: */ virtual bool gate (function *) { - return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER; + return TARGET_AVX && !TARGET_AVX512F + && TARGET_VZEROUPPER && flag_expensive_optimizations + && !optimize_size; } virtual unsigned int execute (function *) @@ -3874,6 +3876,8 @@ ix86_option_override_internal (bool main_args_p, } ix86_tune_cost = processor_target_table[ix86_tune].cost; + /* TODO: ix86_cost should be chosen at instruction or function granuality + so for cold code we use size_cost even in !optimize_size compilation. */ if (opts->x_optimize_size) ix86_cost = &ix86_size_cost; else @@ -4113,8 +4117,7 @@ ix86_option_override_internal (bool main_args_p, } if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS] - && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS) - && !opts->x_optimize_size) + && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; /* If stack probes are required, the space used for large function @@ -4244,26 +4247,19 @@ ix86_option_override_internal (bool main_args_p, #endif } - /* When not opts->x_optimize for size, enable vzeroupper optimization for - TARGET_AVX with -fexpensive-optimizations and split 32-byte - AVX unaligned load/store. */ - if (!opts->x_optimize_size) - { - if (flag_expensive_optimizations - && !(opts_set->x_target_flags & MASK_VZEROUPPER)) - opts->x_target_flags |= MASK_VZEROUPPER; - if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL] - && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD)) - opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD; - if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL] - && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE)) - opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE; - /* Enable 128-bit AVX instruction generation - for the auto-vectorizer. */ - if (TARGET_AVX128_OPTIMAL - && !(opts_set->x_target_flags & MASK_PREFER_AVX128)) - opts->x_target_flags |= MASK_PREFER_AVX128; - } + if (!(opts_set->x_target_flags & MASK_VZEROUPPER)) + opts->x_target_flags |= MASK_VZEROUPPER; + if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL] + && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD)) + opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD; + if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL] + && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE)) + opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE; + /* Enable 128-bit AVX instruction generation + for the auto-vectorizer. */ + if (TARGET_AVX128_OPTIMAL + && !(opts_set->x_target_flags & MASK_PREFER_AVX128)) + opts->x_target_flags |= MASK_PREFER_AVX128; if (opts->x_ix86_recip_name) { @@ -17469,7 +17465,8 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1) if (MEM_P (op1)) { - if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD) + if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD + && optimize_insn_for_speed_p ()) { rtx r = gen_reg_rtx (mode); m = adjust_address (op1, mode, 0); @@ -17489,7 +17486,8 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1) } else if (MEM_P (op0)) { - if (TARGET_AVX256_SPLIT_UNALIGNED_STORE) + if (TARGET_AVX256_SPLIT_UNALIGNED_STORE + && optimize_insn_for_speed_p ()) { m = adjust_address (op0, mode, 0); emit_insn (extract (m, op1, const0_rtx)); diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 41de832ebba..ee2d93bdbcb 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -5434,7 +5434,7 @@ { rtx tmp0, tmp1; - if (TARGET_AVX && !TARGET_PREFER_AVX128) + if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ()) { tmp0 = gen_reg_rtx (V4DFmode); tmp1 = force_reg (V2DFmode, operands[1]); @@ -5496,7 +5496,7 @@ { rtx tmp0, tmp1, tmp2; - if (TARGET_AVX && !TARGET_PREFER_AVX128) + if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ()) { tmp0 = gen_reg_rtx (V4DFmode); tmp1 = force_reg (V2DFmode, operands[1]); @@ -5593,7 +5593,7 @@ { rtx tmp0, tmp1, tmp2; - if (TARGET_AVX && !TARGET_PREFER_AVX128) + if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ()) { tmp0 = gen_reg_rtx (V4DFmode); tmp1 = force_reg (V2DFmode, operands[1]); @@ -14472,7 +14472,7 @@ rtx tmp0, tmp1; if (mode == V2DFmode - && TARGET_AVX && !TARGET_PREFER_AVX128) + && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ()) { rtx tmp2 = gen_reg_rtx (V4DFmode); @@ -14579,7 +14579,7 @@ rtx tmp0, tmp1; if (mode == V2DFmode - && TARGET_AVX && !TARGET_PREFER_AVX128) + && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ()) { rtx tmp2 = gen_reg_rtx (V4DFmode); -- 2.11.4.GIT