From 78040ae8d3ef07c86234b771afa9da47018a6341 Mon Sep 17 00:00:00 2001 From: marxin Date: Mon, 17 Sep 2018 08:47:38 +0000 Subject: [PATCH] i386: move alignment defaults to processor_costs. 2018-09-17 Martin Liska * common/config/i386/i386-common.c (ix86_get_valid_option_values): Use processor_names table. * config/i386/i386.c (ix86_default_align): Use processor_cost_table for alignment values. (ix86_option_override_internal): Use processor_names. (ix86_function_specific_print): Likewise. * config/i386/i386.h (struct processor_costs): Add alignment values. (struct ptt): Remove and replace with const char *. * config/i386/x86-tune-costs.h (struct processor_costs): Declare default alignments for all costs. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@264359 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 14 +++++ gcc/common/config/i386/i386-common.c | 82 +++++++++++++-------------- gcc/config/i386/i386.c | 15 +++-- gcc/config/i386/i386.h | 22 +++----- gcc/config/i386/x86-tune-costs.h | 104 +++++++++++++++++++++++++++++++++++ 5 files changed, 173 insertions(+), 64 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 59b73ed0898..db4c06541b8 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,17 @@ +2018-09-17 Martin Liska + + * common/config/i386/i386-common.c (ix86_get_valid_option_values): + Use processor_names table. + * config/i386/i386.c (ix86_default_align): Use + processor_cost_table for alignment values. + (ix86_option_override_internal): Use processor_names. + (ix86_function_specific_print): Likewise. + * config/i386/i386.h (struct processor_costs): + Add alignment values. + (struct ptt): Remove and replace with const char *. + * config/i386/x86-tune-costs.h (struct processor_costs): + Declare default alignments for all costs. + 2018-09-17 Aldy Hernandez * tree-vrp.c (extract_range_from_unary_expr): Do not special case diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c index c7eb859e1c1..3b5312d7250 100644 --- a/gcc/common/config/i386/i386-common.c +++ b/gcc/common/config/i386/i386-common.c @@ -1461,49 +1461,45 @@ i386_except_unwind_info (struct gcc_options *opts) #define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack /* This table must be in sync with enum processor_type in i386.h. */ -const struct ptt processor_target_table[PROCESSOR_max] = +const char *const processor_names[PROCESSOR_max] = { - /* The "0:0:8" label alignment specified for some processors generates - secondary 8-byte alignment only for those label/jump/loop targets - which have primary alignment. */ - - {"generic", "16:11:8", "16:11:8", "0:0:8", "16"}, - {"i386", "4", "4", NULL, "4" }, - {"i486", "16", "16", "0:0:8", "16"}, - {"pentium", "16:8:8", "16:8:8", "0:0:8", "16"}, - {"lakemont", "16:8:8", "16:8:8", "0:0:8", "16"}, - {"pentiumpro", "16", "16:11:8", "0:0:8", "16"}, - {"pentium4", NULL, NULL, NULL, NULL}, - {"nocona", NULL, NULL, NULL, NULL}, - {"core2", "16:11:8", "16:11:8", "0:0:8", "16"}, - {"nehalem", "16:11:8", "16:11:8", "0:0:8", "16"}, - {"sandybridge", "16:11:8", "16:11:8", "0:0:8", "16"}, - {"haswell", "16:11:8", "16:11:8", "0:0:8", "16"}, - {"bonnell", "16", "16:8:8", "0:0:8", "16"}, - {"silvermont", "16", "16:8:8", "0:0:8", "16"}, - {"goldmont", "16", "16:8:8", "0:0:8", "16"}, - {"goldmont-plus", "16", "16:8:8", "0:0:8", "16"}, - {"tremont", "16", "16:8:8", "0:0:8", "16"}, - {"knl", "16", "16:8:8", "0:0:8", "16"}, - {"knm", "16", "16:8:8", "0:0:8", "16"}, - {"skylake", "16:11:8", "16:11:8", "0:0:8", "16"}, - {"skylake-avx512", "16:11:8", "16:11:8", "0:0:8", "16"}, - {"cannonlake", "16:11:8", "16:11:8", "0:0:8", "16"}, - {"icelake-client", "16:11:8", "16:11:8", "0:0:8", "16"}, - {"icelake-server", "16:11:8", "16:11:8", "0:0:8", "16"}, - {"intel", "16", "16:8:8", "0:0:8", "16"}, - {"geode", NULL, NULL, NULL, NULL}, - {"k6", "32:8:8", "32:8:8", "0:0:8", "32"}, - {"athlon", "16:8:8", "16:8:8", "0:0:8", "16"}, - {"k8", "16:8:8", "16:8:8", "0:0:8", "16"}, - {"amdfam10", "32:25:8", "32:8:8", "0:0:8", "32"}, - {"bdver1", "16:11:8", "16:8:8", "0:0:8", "11"}, - {"bdver2", "16:11:8", "16:8:8", "0:0:8", "11"}, - {"bdver3", "16:11:8", "16:8:8", "0:0:8", "11"}, - {"bdver4", "16:11:8", "16:8:8", "0:0:8", "11"}, - {"btver1", "16:11:8", "16:8:8", "0:0:8", "11"}, - {"btver2", "16:11:8", "16:8:8", "0:0:8", "11"}, - {"znver1", "16", "16", "0:0:8", "16"} + "generic", + "i386", + "i486", + "pentium", + "lakemont", + "pentiumpro", + "pentium4", + "nocona", + "core2", + "nehalem", + "sandybridge", + "haswell", + "bonnell", + "silvermont", + "goldmont", + "goldmont-plus", + "tremont", + "knl", + "knm", + "skylake", + "skylake-avx512", + "cannonlake", + "icelake-client", + "icelake-server", + "intel", + "geode", + "k6", + "athlon", + "k8", + "amdfam10", + "bdver1", + "bdver2", + "bdver3", + "bdver4", + "btver1", + "btver2", + "znver1" }; const pta processor_alias_table[] = @@ -1715,7 +1711,7 @@ ix86_get_valid_option_values (int option_code, break; case OPT_mtune_: for (unsigned i = 0; i < PROCESSOR_max; i++) - v.safe_push (processor_target_table[i].name); + v.safe_push (processor_names[i]); break; default: break; diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 48e484b3d62..96759de49bc 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -3352,13 +3352,13 @@ ix86_default_align (struct gcc_options *opts) { /* -falign-foo without argument: supply one. */ if (opts->x_flag_align_loops && !opts->x_str_align_loops) - opts->x_str_align_loops = processor_target_table[ix86_tune].align_loop; + opts->x_str_align_loops = processor_cost_table[ix86_tune]->align_loop; if (opts->x_flag_align_jumps && !opts->x_str_align_jumps) - opts->x_str_align_jumps = processor_target_table[ix86_tune].align_jump; + opts->x_str_align_jumps = processor_cost_table[ix86_tune]->align_jump; if (opts->x_flag_align_labels && !opts->x_str_align_labels) - opts->x_str_align_labels = processor_target_table[ix86_tune].align_label; + opts->x_str_align_labels = processor_cost_table[ix86_tune]->align_label; if (opts->x_flag_align_functions && !opts->x_str_align_functions) - opts->x_str_align_functions = processor_target_table[ix86_tune].align_func; + opts->x_str_align_functions = processor_cost_table[ix86_tune]->align_func; } /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */ @@ -3488,8 +3488,7 @@ ix86_option_override_internal (bool main_args_p, opts->x_ix86_tune_string = opts->x_ix86_arch_string; if (!opts->x_ix86_tune_string) { - opts->x_ix86_tune_string - = processor_target_table[TARGET_CPU_DEFAULT].name; + opts->x_ix86_tune_string = processor_names[TARGET_CPU_DEFAULT]; ix86_tune_defaulted = 1; } @@ -4940,12 +4939,12 @@ ix86_function_specific_print (FILE *file, int indent, gcc_assert (ptr->arch < PROCESSOR_max); fprintf (file, "%*sarch = %d (%s)\n", indent, "", - ptr->arch, processor_target_table[ptr->arch].name); + ptr->arch, processor_names[ptr->arch]); gcc_assert (ptr->tune < PROCESSOR_max); fprintf (file, "%*stune = %d (%s)\n", indent, "", - ptr->tune, processor_target_table[ptr->tune].name); + ptr->tune, processor_names[ptr->tune]); fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost); diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 382323d385b..01eba5dd01f 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -311,6 +311,14 @@ struct processor_costs { cost model. */ const int cond_not_taken_branch_cost;/* Cost of not taken branch for vectorizer cost model. */ + + /* The "0:0:8" label alignment specified for some processors generates + secondary 8-byte alignment only for those label/jump/loop targets + which have primary alignment. */ + const char *const align_loop; /* Loop alignment. */ + const char *const align_jump; /* Jump alignment. */ + const char *const align_label; /* Label alignment. */ + const char *const align_func; /* Function alignment. */ }; extern const struct processor_costs *ix86_cost; @@ -2278,19 +2286,7 @@ enum processor_type }; #if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) -/* Processor target table, indexed by processor number */ -struct ptt -{ - const char *const name; /* processor name */ - - /* Default alignments. */ - const char *const align_loop; - const char *const align_jump; - const char *const align_label; - const char *const align_func; -}; - -extern const struct ptt processor_target_table[PROCESSOR_max]; +extern const char *const processor_names[PROCESSOR_max]; #include "wide-int-bitmask.h" diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index ff289342e4f..71a5854c09a 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -111,6 +111,10 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */ ix86_size_memset, COSTS_N_BYTES (1), /* cond_taken_branch_cost. */ COSTS_N_BYTES (1), /* cond_not_taken_branch_cost. */ + NULL, /* Loop alignment. */ + NULL, /* Jump alignment. */ + NULL, /* Label alignment. */ + NULL, /* Func alignment. */ }; /* Processor costs (relative to an add) */ @@ -197,6 +201,10 @@ struct processor_costs i386_cost = { /* 386 specific costs */ i386_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ + "4", /* Loop alignment. */ + "4", /* Jump alignment. */ + NULL, /* Label alignment. */ + "4", /* Func alignment. */ }; static stringop_algs i486_memcpy[2] = { @@ -284,6 +292,10 @@ struct processor_costs i486_cost = { /* 486 specific costs */ i486_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ + "16", /* Loop alignment. */ + "16", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ }; static stringop_algs pentium_memcpy[2] = { @@ -369,6 +381,10 @@ struct processor_costs pentium_cost = { pentium_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ + "16:8:8", /* Loop alignment. */ + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ }; static const @@ -447,6 +463,10 @@ struct processor_costs lakemont_cost = { pentium_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ + "16:8:8", /* Loop alignment. */ + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ }; /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes @@ -540,6 +560,10 @@ struct processor_costs pentiumpro_cost = { pentiumpro_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ + "16", /* Loop alignment. */ + "16:11:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ }; static stringop_algs geode_memcpy[2] = { @@ -625,6 +649,10 @@ struct processor_costs geode_cost = { geode_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ + NULL, /* Loop alignment. */ + NULL, /* Jump alignment. */ + NULL, /* Label alignment. */ + NULL, /* Func alignment. */ }; static stringop_algs k6_memcpy[2] = { @@ -712,6 +740,10 @@ struct processor_costs k6_cost = { k6_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ + "32:8:8", /* Loop alignment. */ + "32:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "32", /* Func alignment. */ }; /* For some reason, Athlon deals better with REP prefix (relative to loops) @@ -800,6 +832,10 @@ struct processor_costs athlon_cost = { athlon_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ + "16:8:8", /* Loop alignment. */ + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ }; /* K8 has optimized REP instruction for medium sized blocks, but for very @@ -897,6 +933,10 @@ struct processor_costs k8_cost = { k8_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ + "16:8:8", /* Loop alignment. */ + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ }; /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for @@ -1001,6 +1041,10 @@ struct processor_costs amdfam10_cost = { amdfam10_memset, COSTS_N_INSNS (2), /* cond_taken_branch_cost. */ COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ + "32:25:8", /* Loop alignment. */ + "32:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "32", /* Func alignment. */ }; /* BDVER1 has optimized REP instruction for medium sized blocks, but for @@ -1099,6 +1143,10 @@ const struct processor_costs bdver1_cost = { bdver1_memset, COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ + "16:11:8", /* Loop alignment. */ + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "11", /* Func alignment. */ }; /* BDVER2 has optimized REP instruction for medium sized blocks, but for @@ -1198,6 +1246,10 @@ const struct processor_costs bdver2_cost = { bdver2_memset, COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ + "16:11:8", /* Loop alignment. */ + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "11", /* Func alignment. */ }; @@ -1296,6 +1348,10 @@ struct processor_costs bdver3_cost = { bdver3_memset, COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ + "16:11:8", /* Loop alignment. */ + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "11", /* Func alignment. */ }; /* BDVER4 has optimized REP instruction for medium sized blocks, but for @@ -1393,6 +1449,10 @@ struct processor_costs bdver4_cost = { bdver4_memset, COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ + "16:11:8", /* Loop alignment. */ + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "11", /* Func alignment. */ }; @@ -1513,6 +1573,10 @@ struct processor_costs znver1_cost = { znver1_memset, COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ + "16", /* Loop alignment. */ + "16", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ }; /* skylake_cost should produce code tuned for Skylake familly of CPUs. */ @@ -1605,6 +1669,10 @@ struct processor_costs skylake_cost = { skylake_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ + "16:11:8", /* Loop alignment. */ + "16:11:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ }; /* BTVER1 has optimized REP instruction for medium sized blocks, but for very small blocks it is better to use loop. For large blocks, libcall can @@ -1694,6 +1762,10 @@ const struct processor_costs btver1_cost = { btver1_memset, COSTS_N_INSNS (2), /* cond_taken_branch_cost. */ COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ + "16:11:8", /* Loop alignment. */ + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "11", /* Func alignment. */ }; static stringop_algs btver2_memcpy[2] = { @@ -1781,6 +1853,10 @@ const struct processor_costs btver2_cost = { btver2_memset, COSTS_N_INSNS (2), /* cond_taken_branch_cost. */ COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ + "16:11:8", /* Loop alignment. */ + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "11", /* Func alignment. */ }; static stringop_algs pentium4_memcpy[2] = { @@ -1867,6 +1943,10 @@ struct processor_costs pentium4_cost = { pentium4_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ + NULL, /* Loop alignment. */ + NULL, /* Jump alignment. */ + NULL, /* Label alignment. */ + NULL, /* Func alignment. */ }; static stringop_algs nocona_memcpy[2] = { @@ -1956,6 +2036,10 @@ struct processor_costs nocona_cost = { nocona_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ + NULL, /* Loop alignment. */ + NULL, /* Jump alignment. */ + NULL, /* Label alignment. */ + NULL, /* Func alignment. */ }; static stringop_algs atom_memcpy[2] = { @@ -2043,6 +2127,10 @@ struct processor_costs atom_cost = { atom_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ + "16", /* Loop alignment. */ + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ }; static stringop_algs slm_memcpy[2] = { @@ -2130,6 +2218,10 @@ struct processor_costs slm_cost = { slm_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ + "16", /* Loop alignment. */ + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ }; static stringop_algs intel_memcpy[2] = { @@ -2217,6 +2309,10 @@ struct processor_costs intel_cost = { intel_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ + "16", /* Loop alignment. */ + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ }; /* Generic should produce code tuned for Core-i7 (and newer chips) @@ -2313,6 +2409,10 @@ struct processor_costs generic_cost = { generic_memset, COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ + "16:11:8", /* Loop alignment. */ + "16:11:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ }; /* core_cost should produce code tuned for Core familly of CPUs. */ @@ -2416,5 +2516,9 @@ struct processor_costs core_cost = { core_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ + "16:11:8", /* Loop alignment. */ + "16:11:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ }; -- 2.11.4.GIT