From be917ea20a1b06840552a599b099eb96f855cb8e Mon Sep 17 00:00:00 2001 From: vmakarov Date: Mon, 3 Mar 2008 23:20:00 +0000 Subject: [PATCH] 2008-03-03 Vladimir Makarov * ira-conflicts.c (allocno_conflict_p): Rename to allocno_live_ranges_intersect_p. (allocno_reg_conflict_p): Rename to pseudo_live_ranges_intersect_p. * ira-lives.c (high_pressure_start_point): New variable. (update_allocno_pressure_excess_length): New function. (make_regno_dead): Call it. (set_allocno_live): Udpate high_pressure_start_point. (clear_allocno_live, mark_reg_store, mark_reg_death): Ditto. (process_bb_node_lives): Initialize high_pressure_start_point. Consider only allocatable hard regs for liveness. * ira-costs.c (record_reg_classes): Multiply cost by frequency. Use half frequency when memory is allowed. Remove check for prefferred small register classes of pseudo-registers. (scan_one_insn): Continue processing operands for moves of pseudo-registers with equivalence. Don't multiply cost by frequency. (init_ira_costs): Use bigger initial cost value. * ira-int.h: Ditto. (allocno): New members somewhere_renamed_p and excess_pressure_points_num. (ALLOCNO_SOMEWHERE_RENAMED_P, ALLOCNO_EXCESS_PRESSURE_POINTS_NUM): New macros. (allocno_reg_conflict_p): Rename to pseudo_live_ranges_intersect_p. (allocno_conflict_p): Rename to allocno_live_ranges_intersect_p. * ira.h (sort_regnos_for_alter_reg, better_spill_reload_regno_p): New function prototypes. * ira-color.c (merge_allocnos): Remove mode check. (coalesce_allocnos): Add parameter and modify to use it for coalescing spilled allocnos. (color_allocnos): Pass parameter to coalesce_allocnos. (regno_coalesced_allocno_freq, regno_coalesced_allocno_num): New variables. (coalesced_pseudo_reg_compare, calculate_spill_cost, sort_regnos_for_alter_reg, better_spill_reload_regno_p): New functions. (mark_allocation_change, allocno_reload_assign, reassign_pseudos ): Use ALLOCNO_MEMORY_COST instead of ALLOCNO_UPDATED_MEMORY_COST. (reuse_stack_slot): Use pseudo_live_ranges_intersect_p instead of allocno_reg_conflict_p. * ira-emit.c (set_allocno_somewhere_renamed_p): New function. (renamed_regno_bitmap): New static variable. (change_loop): Check register pressure for creating new pseudo. Set up ALLOCNO_SOMEWHERE_RENAMED_P and renamed_regno_bitmap. (ira_emit): Allocate/deallocate renamed_regno_bitmap. Call set_allocno_somewhere_renamed_p. * ira-build.c (ira_loops): Move to ira.c. (create_allocno): Initialize ALLOCNO_SOMEWHERE_RENAMED_P and ALLOCNO_MEMORY_COST. (create_cap_allocno): Set up ALLOCNO_MEMORY_COST. (check_and_add_conflicts): Use allocno_live_ranges_intersect_p instead of allocno_conflict_p. (ira_flattening): Check conflicts of allocno renamed somewhere locally. Update ALLOCNO_EXCESS_PRESSURE_POINTS_NUM. (ira_build): Move code for finding loops to ira.c. (ira_destroy): Move code for freeing loops to ira.c. * ira.c (setup_reg_class_intersect_union): Fix calculation of reg_class_union. (calculate_allocation_cost): Use ALLOCNO_MEMORY_COST instead of ALLOCNO_UPDATED_MEMORY_COST. (ira_loops): Move from ira-build.c. (ira): Move code for finding and freeing loops from ira-build.c. Don't use regional RA when there are too many loops. * reload1.c (pseudo_reg_compare): Remove. (reload): Use sort_regnos_for_alter_reg. (hard_regno_to_pseudo_regno): New variable. (count_pseudo): Set up hard_regno_to_pseudo_regno. (order_regs_for_reload): Initialize hard_regno_to_pseudo_regno. (count_spilled_pseudo): Update hard_regno_to_pseudo_regno. (find_reg): Use better_spill_reload_regno_p. Check hard_regno_to_pseudo_regno. * cfgloopanal.c: Include params.h. (estimate_reg_pressure_cost): Check number of loops. * params.def (PARAM_IRA_MAX_LOOPS_NUM): New parameter. * params.h (IRA_MAX_LOOPS_NUM): New macro. * Makefile.in (cfgloopanal.o): Add PARAMS_H. * doc/invoke.texi (ira-max-loops-num): Describe. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/ira@132845 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 95 +++++++++++++++++++++ gcc/Makefile.in | 2 +- gcc/cfgloopanal.c | 4 +- gcc/doc/invoke.texi | 7 ++ gcc/ira-build.c | 42 +++++----- gcc/ira-color.c | 234 ++++++++++++++++++++++++++++++++++++++++++++++++---- gcc/ira-conflicts.c | 16 ++-- gcc/ira-costs.c | 62 ++++++-------- gcc/ira-emit.c | 44 ++++++++-- gcc/ira-int.h | 12 ++- gcc/ira-lives.c | 68 ++++++++++++++- gcc/ira.c | 45 +++++----- gcc/ira.h | 3 + gcc/params.def | 5 ++ gcc/params.h | 2 + gcc/reload1.c | 76 +++++++++++------ 16 files changed, 576 insertions(+), 141 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6a9619068b9..00f2d927ffc 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,98 @@ +2008-03-03 Vladimir Makarov + + * ira-conflicts.c (allocno_conflict_p): Rename to + allocno_live_ranges_intersect_p. + (allocno_reg_conflict_p): Rename to + pseudo_live_ranges_intersect_p. + + * ira-lives.c (high_pressure_start_point): New variable. + (update_allocno_pressure_excess_length): New function. + (make_regno_dead): Call it. + (set_allocno_live): Udpate high_pressure_start_point. + (clear_allocno_live, mark_reg_store, mark_reg_death): Ditto. + (process_bb_node_lives): Initialize high_pressure_start_point. + Consider only allocatable hard regs for liveness. + + * ira-costs.c (record_reg_classes): Multiply cost by frequency. + Use half frequency when memory is allowed. Remove check for + prefferred small register classes of pseudo-registers. + (scan_one_insn): Continue processing operands for moves of + pseudo-registers with equivalence. Don't multiply cost by + frequency. + (init_ira_costs): Use bigger initial cost value. + + * ira-int.h: Ditto. + (allocno): New members somewhere_renamed_p and + excess_pressure_points_num. + (ALLOCNO_SOMEWHERE_RENAMED_P, ALLOCNO_EXCESS_PRESSURE_POINTS_NUM): + New macros. + (allocno_reg_conflict_p): Rename to + pseudo_live_ranges_intersect_p. + (allocno_conflict_p): Rename to allocno_live_ranges_intersect_p. + + * ira.h (sort_regnos_for_alter_reg, better_spill_reload_regno_p): + New function prototypes. + + * ira-color.c (merge_allocnos): Remove mode check. + (coalesce_allocnos): Add parameter and modify to use it for + coalescing spilled allocnos. + (color_allocnos): Pass parameter to coalesce_allocnos. + (regno_coalesced_allocno_freq, regno_coalesced_allocno_num): New + variables. + (coalesced_pseudo_reg_compare, calculate_spill_cost, + sort_regnos_for_alter_reg, better_spill_reload_regno_p): New + functions. + (mark_allocation_change, allocno_reload_assign, reassign_pseudos + ): Use ALLOCNO_MEMORY_COST instead of ALLOCNO_UPDATED_MEMORY_COST. + (reuse_stack_slot): Use pseudo_live_ranges_intersect_p instead of + allocno_reg_conflict_p. + + * ira-emit.c (set_allocno_somewhere_renamed_p): New function. + (renamed_regno_bitmap): New static variable. + (change_loop): Check register pressure for creating new pseudo. + Set up ALLOCNO_SOMEWHERE_RENAMED_P and renamed_regno_bitmap. + (ira_emit): Allocate/deallocate renamed_regno_bitmap. + Call set_allocno_somewhere_renamed_p. + + * ira-build.c (ira_loops): Move to ira.c. + (create_allocno): Initialize ALLOCNO_SOMEWHERE_RENAMED_P and + ALLOCNO_MEMORY_COST. + (create_cap_allocno): Set up ALLOCNO_MEMORY_COST. + (check_and_add_conflicts): Use allocno_live_ranges_intersect_p + instead of allocno_conflict_p. + (ira_flattening): Check conflicts of allocno renamed somewhere + locally. Update ALLOCNO_EXCESS_PRESSURE_POINTS_NUM. + (ira_build): Move code for finding loops to ira.c. + (ira_destroy): Move code for freeing loops to ira.c. + + * ira.c (setup_reg_class_intersect_union): Fix calculation of + reg_class_union. + (calculate_allocation_cost): Use ALLOCNO_MEMORY_COST instead of + ALLOCNO_UPDATED_MEMORY_COST. + (ira_loops): Move from ira-build.c. + (ira): Move code for finding and freeing loops from ira-build.c. + Don't use regional RA when there are too many loops. + + * reload1.c (pseudo_reg_compare): Remove. + (reload): Use sort_regnos_for_alter_reg. + (hard_regno_to_pseudo_regno): New variable. + (count_pseudo): Set up hard_regno_to_pseudo_regno. + (order_regs_for_reload): Initialize hard_regno_to_pseudo_regno. + (count_spilled_pseudo): Update hard_regno_to_pseudo_regno. + (find_reg): Use better_spill_reload_regno_p. Check + hard_regno_to_pseudo_regno. + + * cfgloopanal.c: Include params.h. + (estimate_reg_pressure_cost): Check number of loops. + + * params.def (PARAM_IRA_MAX_LOOPS_NUM): New parameter. + + * params.h (IRA_MAX_LOOPS_NUM): New macro. + + * Makefile.in (cfgloopanal.o): Add PARAMS_H. + + * doc/invoke.texi (ira-max-loops-num): Describe. + 2008-02-20 Vladimir Makarov * ira.h, ira-emit.c: Add year 2008 to the copyright. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 311d50c51d7..2a02f90ce8f 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -2672,7 +2672,7 @@ cfgloop.o : cfgloop.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) coretypes.h $(TM_H) \ $(GGC_H) cfgloopanal.o : cfgloopanal.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \ $(BASIC_BLOCK_H) hard-reg-set.h $(CFGLOOP_H) $(EXPR_H) coretypes.h $(TM_H) \ - $(OBSTACK_H) output.h graphds.h + $(OBSTACK_H) output.h graphds.h $(PARAMS_H) graphds.o : graphds.c graphds.h $(CONFIG_H) $(SYSTEM_H) bitmap.h $(OBSTACK_H) \ coretypes.h vec.h vecprim.h struct-equiv.o : struct-equiv.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ diff --git a/gcc/cfgloopanal.c b/gcc/cfgloopanal.c index 3efad776aa0..4f5811558f1 100644 --- a/gcc/cfgloopanal.c +++ b/gcc/cfgloopanal.c @@ -29,6 +29,7 @@ along with GCC; see the file COPYING3. If not see #include "expr.h" #include "output.h" #include "graphds.h" +#include "params.h" /* Checks whether BB is executed exactly once in each LOOP iteration. */ @@ -375,7 +376,8 @@ estimate_reg_pressure_cost (unsigned n_new, unsigned n_old) unsigned regs_needed = n_new + n_old; if (flag_ira && (flag_ira_algorithm == IRA_ALGORITHM_REGIONAL - || flag_ira_algorithm == IRA_ALGORITHM_MIXED)) + || flag_ira_algorithm == IRA_ALGORITHM_MIXED) + && number_of_loops () <= (unsigned) IRA_MAX_LOOPS_NUM) /* Let IRA itself to deal with high register pressure. */ return 0; diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 355ed818ce1..5df8d22ef6f 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -7361,6 +7361,13 @@ processing. If this limit is hit, SCCVN processing for the whole function will not be done and optimizations depending on it will be disabled. The default maximum SCC size is 10000. +@item ira-max-loops-num +IRA uses a regional register allocation by default. If a function +contains loops more than number given by the parameter, non-regional +register allocator will be used even when option +@option{-fira-algorithm} is given. The default value of the parameter +is 20. + @end table @end table diff --git a/gcc/ira-build.c b/gcc/ira-build.c index e2ca6a368c7..a0c59acbda7 100644 --- a/gcc/ira-build.c +++ b/gcc/ira-build.c @@ -89,9 +89,6 @@ static void check_and_add_conflicts (allocno_t, allocno_t *); static void add_conflict_with_underlying_allocnos (allocno_t, loop_tree_node_t, int); -/* All natural loops. */ -struct loops ira_loops; - /* The root of the loop tree corresponding to the all function. */ loop_tree_node_t ira_loop_tree_root; @@ -514,6 +511,7 @@ create_allocno (int regno, int cap_p, loop_tree_node_t loop_tree_node) #endif ALLOCNO_MEM_OPTIMIZED_DEST (a) = NULL; ALLOCNO_MEM_OPTIMIZED_DEST_P (a) = FALSE; + ALLOCNO_SOMEWHERE_RENAMED_P (a) = FALSE; ALLOCNO_DONT_REASSIGN_P (a) = FALSE; ALLOCNO_IN_GRAPH_P (a) = FALSE; ALLOCNO_ASSIGNED_P (a) = FALSE; @@ -528,7 +526,9 @@ create_allocno (int regno, int cap_p, loop_tree_node_t loop_tree_node) ALLOCNO_COVER_CLASS (a) = NO_REGS; ALLOCNO_BEST_CLASS (a) = NO_REGS; ALLOCNO_COVER_CLASS_COST (a) = 0; + ALLOCNO_MEMORY_COST (a) = 0; ALLOCNO_UPDATED_MEMORY_COST (a) = 0; + ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (a) = 0; ALLOCNO_NEXT_BUCKET_ALLOCNO (a) = NULL; ALLOCNO_PREV_BUCKET_ALLOCNO (a) = NULL; ALLOCNO_FIRST_COALESCED_ALLOCNO (a) = a; @@ -689,6 +689,7 @@ create_cap_allocno (allocno_t a) ALLOCNO_CAP (a) = cap; ALLOCNO_COVER_CLASS_COST (cap) = ALLOCNO_COVER_CLASS_COST (a); /* ??? memory_cost */ + ALLOCNO_MEMORY_COST (cap) = ALLOCNO_MEMORY_COST (a); ALLOCNO_UPDATED_MEMORY_COST (cap) = ALLOCNO_UPDATED_MEMORY_COST (a); if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL) { @@ -1519,7 +1520,7 @@ check_and_add_conflicts (allocno_t a, allocno_t *conflict_vec) { conflict_a = regno_top_level_allocno_map [REGNO (ALLOCNO_REG (conflict_a))]; - if (allocno_conflict_p (conflict_a, a)) + if (allocno_live_ranges_intersect_p (conflict_a, a)) { add_to_allocno_conflict_vec (conflict_a, a); add_to_allocno_conflict_vec (a, conflict_a); @@ -1571,7 +1572,8 @@ add_conflict_with_underlying_allocnos (allocno_t a, void ira_flattening (int max_regno_before_emit, int max_point_before_emit) { - int i, j, k, free, propagate_p, stop_p, keep_p, hard_regs_num, new_allocnos_p; + int i, j, k, free, propagate_p, stop_p, keep_p; + int hard_regs_num, new_allocnos_p, renamed_p, start; unsigned int n; enum reg_class cover_class; rtx call, *allocno_calls; @@ -1587,7 +1589,7 @@ ira_flattening (int max_regno_before_emit, int max_point_before_emit) = ira_allocate (max_reg_num () * sizeof (allocno_t)); memset (regno_top_level_allocno_map, 0, max_reg_num () * sizeof (allocno_t)); expand_calls (); - new_allocnos_p = FALSE; + new_allocnos_p = renamed_p = FALSE; /* Updating accumulated attributes. */ for (i = max_regno_before_emit - 1; i >= FIRST_PSEUDO_REGISTER; i--) { @@ -1598,6 +1600,8 @@ ira_flattening (int max_regno_before_emit, int max_point_before_emit) { if (ALLOCNO_CAP_MEMBER (a) != NULL) continue; + if (ALLOCNO_SOMEWHERE_RENAMED_P (a)) + renamed_p = TRUE; if ((unsigned int) ALLOCNO_REGNO (a) != REGNO (ALLOCNO_REG (a)) && ALLOCNO_CALLS_CROSSED_NUM (a) != 0) { @@ -1681,6 +1685,8 @@ ira_flattening (int max_regno_before_emit, int max_point_before_emit) ALLOCNO_COVER_CLASS_COST (father_a) -= ALLOCNO_COVER_CLASS_COST (a); ALLOCNO_MEMORY_COST (father_a) -= ALLOCNO_MEMORY_COST (a); + ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (father_a) + -= ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (a); if ((father = ALLOCNO_LOOP_TREE_NODE (father_a)->father) == NULL || (father_a = (father->regno_allocno_map [ALLOCNO_REGNO (father_a)])) == NULL) @@ -1728,7 +1734,8 @@ ira_flattening (int max_regno_before_emit, int max_point_before_emit) regno_top_level_allocno_map [REGNO (ALLOCNO_REG (a))] = a; } } - ira_assert (new_allocnos_p || max_point_before_emit == max_point); + ira_assert (new_allocnos_p || renamed_p + || max_point_before_emit == max_point); if (new_allocnos_p) { /* Fix final allocnos attributes concerning calls. */ @@ -1858,15 +1865,17 @@ ira_flattening (int max_regno_before_emit, int max_point_before_emit) continue; } ALLOCNO_CAP (a) = NULL; - if (new_allocnos_p) + if (new_allocnos_p || ALLOCNO_SOMEWHERE_RENAMED_P (a)) { /* Remove conflicts. */ allocno_vec = ALLOCNO_CONFLICT_ALLOCNO_VEC (a); - for (k = j = ALLOCNO_CONFLICT_ALLOCNOS_NUM (a); + start = (ALLOCNO_SOMEWHERE_RENAMED_P (a) + ? 0 : ALLOCNO_CONFLICT_ALLOCNOS_NUM (a)); + for (k = j = start; (conflict_a = allocno_vec [j]) != NULL; j++) { - if (allocno_conflict_p (a, conflict_a)) + if (allocno_live_ranges_intersect_p (a, conflict_a)) allocno_vec [k++] = conflict_a; else { @@ -1997,9 +2006,6 @@ ira_build (int loops_p) initiate_calls (); initiate_allocnos (); initiate_copies (); - ira_assert (current_loops == NULL); - flow_loops_find (&ira_loops); - current_loops = &ira_loops; create_loop_tree_nodes (loops_p); form_loop_tree (); create_allocnos (); @@ -2051,17 +2057,7 @@ ira_build (int loops_p) void ira_destroy (void) { - basic_block bb; - finish_loop_tree_nodes (); -#if 0 - ira_assert (current_loops == &ira_loops); -#endif - flow_loops_free (&ira_loops); - free_dominance_info (CDI_DOMINATORS); - FOR_ALL_BB (bb) - bb->loop_father = NULL; - current_loops = NULL; finish_copies (); finish_allocnos (); finish_calls (); diff --git a/gcc/ira-color.c b/gcc/ira-color.c index 899ae705c24..6e0c59511eb 100644 --- a/gcc/ira-color.c +++ b/gcc/ira-color.c @@ -65,7 +65,7 @@ static void put_allocno_into_bucket (allocno_t); static int copy_freq_compare_func (const void *, const void *); static void merge_allocnos (allocno_t, allocno_t); static int coalesced_allocno_conflict_p (allocno_t, allocno_t); -static void coalesce_allocnos (void); +static void coalesce_allocnos (int); static void color_allocnos (void); static void print_loop_title (loop_tree_node_t); @@ -76,6 +76,14 @@ static void do_coloring (void); static void move_spill_restore (void); +static int coalesced_pseudo_reg_compare (const void *, const void *); + +static void setup_curr_costs (allocno_t); +static int pseudo_reg_compare (const void *, const void *); + +static int calculate_spill_cost (int *, rtx, rtx, rtx, + int *, int *, int *, int*); + /* Bitmap of allocnos which should be colored. */ static bitmap coloring_allocno_bitmap; @@ -931,7 +939,7 @@ pop_allocnos_from_stack (void) } } -/* Set up number of avaliable hard registers for ALLOCNO. */ +/* Set up number of available hard registers for ALLOCNO. */ static void setup_allocno_available_regs_num (allocno_t allocno) { @@ -1096,7 +1104,6 @@ merge_allocnos (allocno_t a1, allocno_t a2) { allocno_t a, first, last, next; - ira_assert (ALLOCNO_MODE (a1) == ALLOCNO_MODE (a2)); first = ALLOCNO_FIRST_COALESCED_ALLOCNO (a1); if (first == ALLOCNO_FIRST_COALESCED_ALLOCNO (a2)) return; @@ -1151,7 +1158,7 @@ coalesced_allocno_conflict_p (allocno_t a1, allocno_t a2) /* The major function for aggressive coalescing. */ static void -coalesce_allocnos (void) +coalesce_allocnos (int reload_p) { allocno_t a; copy_t cp, next_cp, *sorted_copies; @@ -1168,7 +1175,9 @@ coalesce_allocnos (void) EXECUTE_IF_SET_IN_BITMAP (coloring_allocno_bitmap, 0, j, bi) { a = allocnos [j]; - if (ALLOCNO_ASSIGNED_P (a)) + if ((! reload_p && ALLOCNO_ASSIGNED_P (a)) + || (reload_p + && (! ALLOCNO_ASSIGNED_P (a) || ALLOCNO_HARD_REGNO (a) >= 0))) continue; cover_class = ALLOCNO_COVER_CLASS (a); mode = ALLOCNO_MODE (a); @@ -1177,9 +1186,14 @@ coalesce_allocnos (void) if (cp->first == a) { next_cp = cp->next_first_allocno_copy; - if (ALLOCNO_COVER_CLASS (cp->second) == cover_class - && ALLOCNO_MODE (cp->second) == mode - && cp->insn != NULL && ! ALLOCNO_ASSIGNED_P (cp->second)) + if ((reload_p + || (ALLOCNO_COVER_CLASS (cp->second) == cover_class + && ALLOCNO_MODE (cp->second) == mode)) + && (reload_p || cp->insn != NULL) + && ((! reload_p && ! ALLOCNO_ASSIGNED_P (cp->second)) + || (reload_p + && ALLOCNO_ASSIGNED_P (cp->second) + && ALLOCNO_HARD_REGNO (cp->second) < 0))) sorted_copies [cp_num++] = cp; } else if (cp->second == a) @@ -1233,7 +1247,7 @@ color_allocnos (void) allocno_coalesced_p = FALSE; processed_coalesced_allocno_bitmap = ira_allocate_bitmap (); if (flag_ira_coalesce) - coalesce_allocnos (); + coalesce_allocnos (FALSE); /* Put the allocnos into the corresponding buckets. */ colorable_allocno_bucket = NULL; uncolorable_allocno_bucket = NULL; @@ -1364,8 +1378,8 @@ color_pass (loop_tree_node_t loop_tree_node) if (subloop_allocno == NULL) continue; if ((flag_ira_algorithm == IRA_ALGORITHM_MIXED - && loop_tree_node->reg_pressure [class] - <= available_class_regs [class]) + && (loop_tree_node->reg_pressure [class] + <= available_class_regs [class])) || (hard_regno < 0 && ! bitmap_bit_p (subloop_node->mentioned_allocnos, ALLOCNO_NUM (subloop_allocno)))) @@ -1643,6 +1657,94 @@ move_spill_restore (void) +/* Usage frequency and order number of coalesced allocno set to which + given pseudo register belongs to. */ +static int *regno_coalesced_allocno_freq; +static int *regno_coalesced_allocno_num; + +/* The function is used to sort pseudos according frequencies of + coalesced allocnos they belong to (putting most frequently ones + first when the frame pointer is needed or the last otherwise), and + according to coalesced allocno numbers. */ +static int +coalesced_pseudo_reg_compare (const void *v1p, const void *v2p) +{ + int regno1 = *(int *) v1p; + int regno2 = *(int *) v2p; + int diff; + + if ((diff = (regno_coalesced_allocno_freq [regno2] + - regno_coalesced_allocno_freq [regno1])) != 0) + return frame_pointer_needed ? diff : -diff; + if ((diff = (regno_coalesced_allocno_num [regno1] + - regno_coalesced_allocno_num [regno2])) != 0) + return frame_pointer_needed ? diff : -diff; + return frame_pointer_needed ? regno1 - regno2 : regno2 - regno1; +} + +/* The function sorts pseudo-register numbers in array PSEUDO_REGNOS + of length N for subsequent assigning stack slots to them in the + reload. */ +void +sort_regnos_for_alter_reg (int *pseudo_regnos, int n) +{ + int max_regno = max_reg_num (); + int i, regno, num, assign_p, freq; + allocno_t allocno, a; + + coloring_allocno_bitmap = ira_allocate_bitmap (); + for (i = 0; i < n; i++) + { + regno = pseudo_regnos [i]; + allocno = regno_allocno_map [regno]; + if (allocno != NULL) + bitmap_set_bit (coloring_allocno_bitmap, + ALLOCNO_NUM (allocno)); + } + allocno_coalesced_p = FALSE; + processed_coalesced_allocno_bitmap = ira_allocate_bitmap (); + coalesce_allocnos (TRUE); + ira_free_bitmap (processed_coalesced_allocno_bitmap); + ira_free_bitmap (coloring_allocno_bitmap); + allocno_coalesced_p = FALSE; + regno_coalesced_allocno_freq = ira_allocate (max_regno * sizeof (int)); + regno_coalesced_allocno_num = ira_allocate (max_regno * sizeof (int)); + memset (regno_coalesced_allocno_num, 0, max_regno * sizeof (int)); + for (num = i = 0; i < n; i++) + { + regno = pseudo_regnos [i]; + allocno = regno_allocno_map [regno]; + if (allocno == NULL) + { + regno_coalesced_allocno_freq [regno] = 0; + regno_coalesced_allocno_num [regno] = ++num; + continue; + } + assign_p = regno_coalesced_allocno_num [regno] == 0; + if (assign_p) + num++; + for (freq = 0, a = ALLOCNO_NEXT_COALESCED_ALLOCNO (allocno);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + if (assign_p) + regno_coalesced_allocno_num [ALLOCNO_REGNO (a)] = num; + freq += ALLOCNO_FREQ (a); + if (a == allocno) + break; + } + regno_coalesced_allocno_freq [regno] = freq; + } + /* Uncoalesce allocnos which is necessary for (re)assigning during + the reload. */ + for (i = 0; i < allocnos_num; i++) + ALLOCNO_NEXT_COALESCED_ALLOCNO (allocnos [i]) = allocnos [i]; + qsort (pseudo_regnos, n, sizeof (int), coalesced_pseudo_reg_compare); + ira_free (regno_coalesced_allocno_num); + ira_free (regno_coalesced_allocno_freq); +} + + + /* Set up current hard reg costs and current conflict hard reg costs for allocno A. */ static void @@ -1785,7 +1887,7 @@ mark_allocation_change (int regno) if ((old_hard_regno = ALLOCNO_HARD_REGNO (a)) == hard_regno) return; if (old_hard_regno < 0) - cost = -ALLOCNO_UPDATED_MEMORY_COST (a); + cost = -ALLOCNO_MEMORY_COST (a); else { ira_assert (class_hard_reg_index [cover_class] [old_hard_regno] >= 0); @@ -1798,7 +1900,7 @@ mark_allocation_change (int regno) overall_cost -= cost; ALLOCNO_HARD_REGNO (a) = hard_regno; if (hard_regno < 0) - cost += ALLOCNO_UPDATED_MEMORY_COST (a); + cost += ALLOCNO_MEMORY_COST (a); else if (class_hard_reg_index [cover_class] [hard_regno] >= 0) { cost += (ALLOCNO_HARD_REG_COSTS (a) == NULL @@ -1849,7 +1951,7 @@ allocno_reload_assign (allocno_t a, HARD_REG_SET forbidden_regs) if (hard_regno >= 0) { ira_assert (class_hard_reg_index [cover_class] [hard_regno] >= 0); - overall_cost -= (ALLOCNO_UPDATED_MEMORY_COST (a) + overall_cost -= (ALLOCNO_MEMORY_COST (a) - (ALLOCNO_HARD_REG_COSTS (a) == NULL ? ALLOCNO_COVER_CLASS_COST (a) : ALLOCNO_HARD_REG_COSTS (a) @@ -1926,7 +2028,7 @@ reassign_pseudos (int *spilled_pseudo_regs, int num, if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL) fprintf (ira_dump_file, " Spill %d(a%d), cost=%d", regno, ALLOCNO_NUM (a), - ALLOCNO_UPDATED_MEMORY_COST (a) + ALLOCNO_MEMORY_COST (a) - ALLOCNO_COVER_CLASS_COST (a)); allocno_reload_assign (a, forbidden_regs); if (reg_renumber [regno] >= 0) @@ -1978,7 +2080,7 @@ reassign_pseudos (int *spilled_pseudo_regs, int num, fprintf (ira_dump_file, " Try assign %d(a%d), cost=%d", regno, ALLOCNO_NUM (a), - ALLOCNO_UPDATED_MEMORY_COST (a) + ALLOCNO_MEMORY_COST (a) - ALLOCNO_COVER_CLASS_COST (a)); if (allocno_reload_assign (a, forbidden_regs)) { @@ -2039,7 +2141,7 @@ reuse_stack_slot (int regno, unsigned int inherent_size, EXECUTE_IF_SET_IN_BITMAP (&slot->spilled_regs, FIRST_PSEUDO_REGISTER, i, bi) { - if (allocno_reg_conflict_p (regno, i)) + if (pseudo_live_ranges_intersect_p (regno, i)) goto cont; } for (freq = 0, cp = ALLOCNO_COPIES (allocno); @@ -2110,6 +2212,104 @@ mark_new_stack_slot (rtx x, int regno, unsigned int total_size) fprintf (ira_dump_file, " Assigning %d a new slot\n", regno); } + +/* Return spill cost for pseudo-registers whose numbers are in array + regnos (end marker is a negative number) for reload with given IN + and OUT for INSN. Return also number points (through + EXCESS_PRESSURE_LIVE_LENGTH) where the pseudo-register lives and + the register pressure is high, number of references of the + pesudo-registers (through NREFS), number of call used + hard-registers occupied by the pseudo-registers (through + CALL_USED_COUNT), and the first hard regno occupied by the + pseudo-registers (through FIRST_HARD_REGNO). */ +static int +calculate_spill_cost (int *regnos, rtx in, rtx out, rtx insn, + int *excess_pressure_live_length, + int *nrefs, int *call_used_count, int *first_hard_regno) +{ + int i, cost, regno, hard_regno, j, count, saved_cost, nregs, in_p, out_p; + int length; + allocno_t a; + + *nrefs = 0; + for (length = count = cost = i = 0;; i++) + { + regno = regnos [i]; + *nrefs += REG_N_REFS (regno); + if (regno < 0) + break; + hard_regno = reg_renumber [regno]; + ira_assert (hard_regno >= 0); + a = regno_allocno_map [regno]; + length += ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (a); + cost += ALLOCNO_MEMORY_COST (a) - ALLOCNO_COVER_CLASS_COST (a); + nregs = hard_regno_nregs [hard_regno] [ALLOCNO_MODE (a)]; + for (j = 0; j < nregs; j++) + if (! TEST_HARD_REG_BIT (call_used_reg_set, hard_regno + j)) + break; + if (j == nregs) + count++; + in_p = in && REG_P (in) && (int) REGNO (in) == hard_regno; + out_p = out && REG_P (out) && (int) REGNO (out) == hard_regno; + if ((in_p || out_p) + && find_regno_note (insn, REG_DEAD, hard_regno) != NULL_RTX) + { + saved_cost = 0; + if (in_p) + saved_cost += memory_move_cost + [ALLOCNO_MODE (a)] [ALLOCNO_COVER_CLASS (a)] [1]; + if (out_p) + saved_cost + += memory_move_cost + [ALLOCNO_MODE (a)] [ALLOCNO_COVER_CLASS (a)] [0]; + cost -= REG_FREQ_FROM_BB (BLOCK_FOR_INSN (insn)) * saved_cost; + } + } + *excess_pressure_live_length = length; + *call_used_count = count; + hard_regno = -1; + if (regnos [0] >= 0) + { + hard_regno = reg_renumber [regnos [0]]; + } + *first_hard_regno = hard_regno; + return cost; +} + +/* Return TRUE if spilling pseudo-registers whose numbers are in array + REGNOS is better spilling pseudo-registers with numbers in + OTHER_REGNOS for reload with given IN and OUT for INSN. */ +int +better_spill_reload_regno_p (int *regnos, int *other_regnos, + rtx in, rtx out, rtx insn) +{ + int cost, other_cost; + int length, other_length; + int nrefs, other_nrefs; + int call_used_count, other_call_used_count; + int hard_regno, other_hard_regno; + + cost = calculate_spill_cost (regnos, in, out, insn, + &length, &nrefs, &call_used_count, &hard_regno); + other_cost = calculate_spill_cost (other_regnos, in, out, insn, + &other_length, &other_nrefs, + &other_call_used_count, + &other_hard_regno); + if (cost != other_cost) + return cost < other_cost; + if (length != other_length) + return length > other_length; +#ifdef REG_ALLOC_ORDER + if (hard_regno >= 0 && other_hard_regno >= 0) + return (inv_reg_alloc_order [hard_regno] + < inv_reg_alloc_order [other_hard_regno]); +#else + if (call_used_count != other_call_used_count) + return call_used_count > other_call_used_count; +#endif + return FALSE; +} + /* The function returns (through CALL_CLOBBERED_REGS) hard registers diff --git a/gcc/ira-conflicts.c b/gcc/ira-conflicts.c index 7ad3e9bf526..383fea93306 100644 --- a/gcc/ira-conflicts.c +++ b/gcc/ira-conflicts.c @@ -653,10 +653,11 @@ propagate_info (void) propagate_allocno_info (a); } -/* The function returns TRUE if allocnos A1 and A2 conflict. It - checks intersection of the corresponding live ranges for this. */ +/* The function returns TRUE if live ranges of allocnos A1 and A2 + intersect. It checks intersection of the corresponding live ranges + for this. */ int -allocno_conflict_p (allocno_t a1, allocno_t a2) +allocno_live_ranges_intersect_p (allocno_t a1, allocno_t a2) { allocno_live_range_t r1, r2; @@ -679,10 +680,11 @@ allocno_conflict_p (allocno_t a1, allocno_t a2) return FALSE; } -/* The function returns TRUE if pseudo-registers REGNO1 and REGNO2 - conflict. It should be used when there is only one region. */ +/* The function returns TRUE if live ranges of pseudo-registers REGNO1 + and REGNO2 intersect. It should be used when there is only one + region. */ int -allocno_reg_conflict_p (int regno1, int regno2) +pseudo_live_ranges_intersect_p (int regno1, int regno2) { allocno_t a1, a2; @@ -693,7 +695,7 @@ allocno_reg_conflict_p (int regno1, int regno2) if ((a1 = ira_loop_tree_root->regno_allocno_map [regno1]) == NULL || (a2 = ira_loop_tree_root->regno_allocno_map [regno2]) == NULL) return FALSE; - return allocno_conflict_p (a1, a2); + return allocno_live_ranges_intersect_p (a1, a2); } /* Remove copies involving conflicting allocnos. */ diff --git a/gcc/ira-costs.c b/gcc/ira-costs.c index be40c20ac2d..4a6ac599051 100644 --- a/gcc/ira-costs.c +++ b/gcc/ira-costs.c @@ -209,7 +209,7 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops, int allows_mem [MAX_RECOG_OPERANDS]; int class; int alt_fail = 0; - int alt_cost = 0; + int alt_cost = 0, op_cost_add; for (i = 0; i < n_ops; i++) { @@ -301,10 +301,10 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops, pp->cost [k] = ((recog_data.operand_type [i] != OP_OUT ? register_may_move_in_cost [mode] - [class] [classes [i]] : 0) + [class] [classes [i]] * frequency : 0) + (recog_data.operand_type [i] != OP_IN ? register_may_move_out_cost [mode] - [classes [i]] [class] : 0)); + [classes [i]] [class] * frequency : 0)); } /* If the alternative actually allows memory, make @@ -312,11 +312,11 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops, insn to load it. */ pp->mem_cost = ((recog_data.operand_type [i] != OP_IN - ? memory_move_cost [mode] [classes [i]] [0] - : 0) + ? memory_move_cost [mode] [classes [i]] [0] + : 0) * frequency + (recog_data.operand_type [i] != OP_OUT ? memory_move_cost [mode] [classes [i]] [1] - : 0) - allows_mem [i]); + : 0) * frequency - allows_mem [i] * frequency / 2); /* If we have assigned a class to this allocno in our first pass, add a cost to this alternative @@ -549,10 +549,10 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops, pp->cost [k] = ((recog_data.operand_type [i] != OP_OUT ? register_may_move_in_cost [mode] - [class] [classes [i]] : 0) + [class] [classes [i]] * frequency : 0) + (recog_data.operand_type [i] != OP_IN ? register_may_move_out_cost [mode] - [classes [i]] [class] : 0)); + [classes [i]] [class] * frequency : 0)); } /* If the alternative actually allows memory, make @@ -561,10 +561,10 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops, pp->mem_cost = ((recog_data.operand_type [i] != OP_IN ? memory_move_cost [mode] [classes [i]] [0] - : 0) + : 0) * frequency + (recog_data.operand_type [i] != OP_OUT ? memory_move_cost [mode] [classes [i]] [1] - : 0) - allows_mem [i]); + : 0) * frequency - allows_mem [i] * frequency / 2); /* If we have assigned a class to this allocno in our first pass, add a cost to this alternative @@ -627,6 +627,7 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops, if (alt_fail) continue; + op_cost_add = alt_cost * frequency; /* Finally, update the costs with the information we've calculated about this alternative. */ for (i = 0; i < n_ops; i++) @@ -637,11 +638,12 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops, int scale = 1 + (recog_data.operand_type [i] == OP_INOUT); pp->mem_cost = MIN (pp->mem_cost, - (qq->mem_cost + alt_cost) * scale); + (qq->mem_cost + op_cost_add) * scale); for (k = 0; k < cost_classes_num; k++) - pp->cost [k] = MIN (pp->cost [k], - (qq->cost [k] + alt_cost) * scale); + pp->cost [k] + = MIN (pp->cost [k], + (qq->cost [k] + op_cost_add) * scale); } } @@ -671,23 +673,7 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops, int class; unsigned int nr; - if (regno >= FIRST_PSEUDO_REGISTER && allocno_pref != 0) - { - enum reg_class pref; - - /* We could use cover class here but it is less accurate - approximation. */ - pref - = allocno_pref [ALLOCNO_NUM (ira_curr_regno_allocno_map - [regno])]; - - if (pref != NO_REGS - && (reg_class_size [pref] - == (unsigned) CLASS_MAX_NREGS (pref, mode)) - && register_move_cost [mode] [pref] [pref] < 10 * 2) - op_costs [i]->cost [cost_class_nums [pref]] = -1; - } - else if (regno < FIRST_PSEUDO_REGISTER) + if (regno < FIRST_PSEUDO_REGISTER) for (k = 0; k < cost_classes_num; k++) { class = cost_classes [k]; @@ -696,7 +682,7 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops, == (unsigned) CLASS_MAX_NREGS (class, mode))) { if (reg_class_size [class] == 1) - op_costs [i]->cost [k] = -1; + op_costs [i]->cost [k] = -frequency; else { for (nr = 0; @@ -707,7 +693,7 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops, break; if (nr == (unsigned) hard_regno_nregs [regno] [mode]) - op_costs [i]->cost [k] = -1; + op_costs [i]->cost [k] = -frequency; } } } @@ -1032,7 +1018,6 @@ scan_one_insn (rtx insn) * frequency); record_address_regs (GET_MODE (SET_SRC (set)), XEXP (SET_SRC (set), 0), 0, MEM, SCRATCH, frequency * 2); - return insn; } record_operand_costs (insn, op_costs, allocno_pref); @@ -1050,9 +1035,9 @@ scan_one_insn (rtx insn) [regno])); struct costs *q = op_costs [i]; - p->mem_cost += q->mem_cost * frequency; + p->mem_cost += q->mem_cost; for (k = 0; k < cost_classes_num; k++) - p->cost [k] += q->cost [k] * frequency; + p->cost [k] += q->cost [k]; } return insn; @@ -1534,9 +1519,9 @@ init_ira_costs (void) = sizeof (struct costs) + sizeof (int) * (important_classes_num - 1); /* Don't use ira_allocate because vectors live through several IRA calls. */ init_cost = xmalloc (max_struct_costs_size); - init_cost->mem_cost = 10000; + init_cost->mem_cost = 1000000; for (i = 0; i < important_classes_num; i++) - init_cost->cost [i] = 10000; + init_cost->cost [i] = 1000000; for (i = 0; i < MAX_RECOG_OPERANDS; i++) { op_costs [i] = xmalloc (max_struct_costs_size); @@ -1635,7 +1620,8 @@ tune_allocno_costs_and_cover_classes (void) get_call_invalidated_used_regs (call, &clobbered_regs, FALSE); /* ??? If only part is call clobbered. */ - if (! hard_reg_not_in_set_p (regno, mode, clobbered_regs)) + if (! hard_reg_not_in_set_p (regno, mode, + clobbered_regs)) cost += freq * (memory_move_cost [mode] [class] [0] + memory_move_cost [mode] [class] [1]); diff --git a/gcc/ira-emit.c b/gcc/ira-emit.c index 65458d0f2e5..edfea7708ed 100644 --- a/gcc/ira-emit.c +++ b/gcc/ira-emit.c @@ -59,6 +59,7 @@ static void set_allocno_reg (allocno_t, rtx); static int not_modified_p (allocno_t, allocno_t); static void generate_edge_moves (edge); static void change_loop (loop_tree_node_t); +static void set_allocno_somewhere_renamed_p (void); static int eq_edge_move_lists_p (VEC(edge,gc) *); static void unify_moves (basic_block, int); static void traverse_moves (struct move *); @@ -340,6 +341,10 @@ static bitmap local_allocno_bitmap; regno. */ static bitmap used_regno_bitmap; +/* This bitmap contains regnos of allocnos which were renamed locally + because the allocnos correspond to disjoint live ranges. */ +static bitmap renamed_regno_bitmap; + /* The following function changes (if necessary) pseudo-registers inside loop given by loop tree node NODE. */ static void @@ -350,7 +355,9 @@ change_loop (loop_tree_node_t node) int regno, used_p; allocno_t allocno, father_allocno, *map; rtx insn, original_reg; - + enum reg_class cover_class; + loop_tree_node_t father; + if (node != ira_loop_tree_root) { @@ -370,12 +377,14 @@ change_loop (loop_tree_node_t node) " Changing RTL for loop %d (header bb%d)\n", node->loop->num, node->loop->header->index); - map = ira_curr_loop_tree_node->father->regno_allocno_map; + father = ira_curr_loop_tree_node->father; + map = father->regno_allocno_map; EXECUTE_IF_SET_IN_REG_SET (ira_curr_loop_tree_node->border_allocnos, 0, i, bi) { allocno = allocnos [i]; regno = ALLOCNO_REGNO (allocno); + cover_class = ALLOCNO_COVER_CLASS (allocno); father_allocno = map [regno]; /* We generate the same register move because the reload can put a allocno into memory in this case we will have live @@ -387,12 +396,14 @@ change_loop (loop_tree_node_t node) && (ALLOCNO_HARD_REGNO (allocno) == ALLOCNO_HARD_REGNO (father_allocno)) && (ALLOCNO_HARD_REGNO (allocno) < 0 + || (father->reg_pressure [cover_class] + 1 + <= available_class_regs [cover_class]) || TEST_HARD_REG_BIT (prohibited_mode_move_regs [ALLOCNO_MODE (allocno)], ALLOCNO_HARD_REGNO (allocno)) - /* don't create copies because reload can spill a - allocno set by copy although allocno will not get - memory slot. */ + /* don't create copies because reload can spill an + allocno set by copy although the allocno will not + get memory slot. */ || reg_equiv_invariant_p [regno] || reg_equiv_const [regno] != NULL_RTX)) continue; @@ -422,12 +433,32 @@ change_loop (loop_tree_node_t node) continue; used_p = bitmap_bit_p (used_regno_bitmap, regno); bitmap_set_bit (used_regno_bitmap, regno); + ALLOCNO_SOMEWHERE_RENAMED_P (allocno) = TRUE; if (! used_p) continue; + bitmap_set_bit (renamed_regno_bitmap, regno); set_allocno_reg (allocno, create_new_reg (ALLOCNO_REG (allocno))); } } +/* The function sets up flag somewhere_renamed_p. */ +static void +set_allocno_somewhere_renamed_p (void) +{ + int i; + unsigned int regno; + allocno_t allocno; + + for (i = 0; i < allocnos_num; i++) + { + allocno = allocnos [i]; + regno = ALLOCNO_REGNO (allocno); + if (bitmap_bit_p (renamed_regno_bitmap, regno) + && REGNO (ALLOCNO_REG (allocno)) == regno) + ALLOCNO_SOMEWHERE_RENAMED_P (allocno) = TRUE; + } +} + /* The function returns nonzero if move lists on all edges in vector VEC are equal. */ static int @@ -927,9 +958,12 @@ ira_emit (int loops_p) memset (at_bb_end, 0, sizeof (struct move *) * last_basic_block); local_allocno_bitmap = ira_allocate_bitmap (); used_regno_bitmap = ira_allocate_bitmap (); + renamed_regno_bitmap = ira_allocate_bitmap (); max_regno_before_changing = max_reg_num (); traverse_loop_tree (FALSE, ira_loop_tree_root, change_loop, NULL); + set_allocno_somewhere_renamed_p (); ira_free_bitmap (used_regno_bitmap); + ira_free_bitmap (renamed_regno_bitmap); ira_free_bitmap (local_allocno_bitmap); FOR_EACH_BB (bb) { diff --git a/gcc/ira-int.h b/gcc/ira-int.h index 27cea8f7ea3..da4b4b7301f 100644 --- a/gcc/ira-int.h +++ b/gcc/ira-int.h @@ -217,6 +217,9 @@ struct allocno /* Minimal accumulated, and updated costs of memory for the allocno. */ int memory_cost, updated_memory_cost; + /* Accumulated number of points where the allocno lives and there is + excess pressure for its class. */ + int excess_pressure_points_num; /* Copies to other non-conflicting allocnos. The copies can represent move insn or potential move insn usually because of two operand constraints. */ @@ -265,6 +268,9 @@ struct allocno /* TRUE if the allocno was a destination of removed move at the end of loop because the value is not changed in loop. */ unsigned int mem_optimized_dest_p : 1; + /* TRUE if the correspdonding pseudo-register has disjoint live + ranges and the other allocnos except this one changed regno. */ + unsigned int somewhere_renamed_p : 1; /* In the reload, we should not reassign a hard register to the allocno got memory if the flag value is TRUE. */ unsigned int dont_reassign_p : 1; @@ -331,6 +337,7 @@ struct allocno #define ALLOCNO_CALLS_CROSSED_NUM(A) ((A)->calls_crossed_num) #define ALLOCNO_MEM_OPTIMIZED_DEST(A) ((A)->mem_optimized_dest) #define ALLOCNO_MEM_OPTIMIZED_DEST_P(A) ((A)->mem_optimized_dest_p) +#define ALLOCNO_SOMEWHERE_RENAMED_P(A) ((A)->somewhere_renamed_p) #define ALLOCNO_DONT_REASSIGN_P(A) ((A)->dont_reassign_p) #ifdef STACK_REGS #define ALLOCNO_NO_STACK_REG_P(A) ((A)->no_stack_reg_p) @@ -353,6 +360,7 @@ struct allocno #define ALLOCNO_COVER_CLASS_COST(A) ((A)->cover_class_cost) #define ALLOCNO_MEMORY_COST(A) ((A)->memory_cost) #define ALLOCNO_UPDATED_MEMORY_COST(A) ((A)->updated_memory_cost) +#define ALLOCNO_EXCESS_PRESSURE_POINTS_NUM(A) ((A)->excess_pressure_points_num) #define ALLOCNO_AVAILABLE_REGS_NUM(A) ((A)->available_regs_num) #define ALLOCNO_NEXT_BUCKET_ALLOCNO(A) ((A)->next_bucket_allocno) #define ALLOCNO_PREV_BUCKET_ALLOCNO(A) ((A)->prev_bucket_allocno) @@ -644,8 +652,8 @@ extern void create_allocno_live_ranges (void); extern void finish_allocno_live_ranges (void); /* ira-conflicts.c */ -extern int allocno_conflict_p (allocno_t, allocno_t); -extern int allocno_reg_conflict_p (int, int); +extern int allocno_live_ranges_intersect_p (allocno_t, allocno_t); +extern int pseudo_live_ranges_intersect_p (int, int); extern void debug_conflicts (int); extern void ira_build_conflicts (void); diff --git a/gcc/ira-lives.c b/gcc/ira-lives.c index ad0b522c308..c41e3ec6239 100644 --- a/gcc/ira-lives.c +++ b/gcc/ira-lives.c @@ -41,6 +41,8 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA /* The file contains code is analogous to one in global but the code works on the allocno basis. */ +static void make_regno_born (int); +static void update_allocno_pressure_excess_length (allocno_t); static void set_allocno_live (allocno_t); static void clear_allocno_live (allocno_t); static void mark_reg_store (rtx, const_rtx, void *); @@ -63,6 +65,10 @@ allocno_live_range_t *start_point_ranges, *finish_point_ranges; /* Number of the current program point. */ static int curr_point; +/* Point where register pressure excess started of -1 if there is no + register pressure excess. */ +static int high_pressure_start_point [N_REG_CLASSES]; + /* Number of ints required to hold allocnos_num bits. */ int allocno_set_words; @@ -115,6 +121,24 @@ make_regno_born (int regno) = create_allocno_live_range (a, curr_point, -1, ALLOCNO_LIVE_RANGES (a)); } +/* Update ALLOCNO_EXCESS_PRESSURE_POINTS_NUM for allocno A. */ +static void +update_allocno_pressure_excess_length (allocno_t a) +{ + int start; + enum reg_class cover_class; + allocno_live_range_t p; + + cover_class = ALLOCNO_COVER_CLASS (a); + if (high_pressure_start_point [cover_class] < 0) + return; + p = ALLOCNO_LIVE_RANGES (a); + ira_assert (p != NULL); + start = (high_pressure_start_point [cover_class] > p->start + ? high_pressure_start_point [cover_class] : p->start); + ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (a) += curr_point - start + 1; +} + /* The function processing death of register REGNO. It updates live hard regs or finish the current live range for allocno corresponding to REGNO. */ @@ -135,6 +159,7 @@ make_regno_dead (int regno) p = ALLOCNO_LIVE_RANGES (a); ira_assert (p != NULL); p->finish = curr_point; + update_allocno_pressure_excess_length (a); } /* The function processing birth and, right after then, death of @@ -153,6 +178,7 @@ static int curr_reg_pressure [N_REG_CLASSES]; static void set_allocno_live (allocno_t a) { + int nregs; enum reg_class cover_class; if (TEST_ALLOCNO_LIVE (ALLOCNO_NUM (a))) @@ -162,8 +188,11 @@ set_allocno_live (allocno_t a) IOR_HARD_REG_SET (ALLOCNO_TOTAL_CONFLICT_HARD_REGS (a), hard_regs_live); bitmap_set_bit (allocnos_live_bitmap, ALLOCNO_NUM (a)); cover_class = ALLOCNO_COVER_CLASS (a); - curr_reg_pressure [cover_class] - += reg_class_nregs [cover_class] [ALLOCNO_MODE (a)]; + nregs = reg_class_nregs [cover_class] [ALLOCNO_MODE (a)]; + curr_reg_pressure [cover_class] += nregs; + if (high_pressure_start_point [cover_class] < 0 + && curr_reg_pressure [cover_class] > available_class_regs [cover_class]) + high_pressure_start_point [cover_class] = curr_point; if (curr_bb_node->reg_pressure [cover_class] < curr_reg_pressure [cover_class]) curr_bb_node->reg_pressure [cover_class] = curr_reg_pressure [cover_class]; @@ -173,6 +202,7 @@ set_allocno_live (allocno_t a) static void clear_allocno_live (allocno_t a) { + int i; enum reg_class cover_class; if (bitmap_bit_p (allocnos_live_bitmap, ALLOCNO_NUM (a))) @@ -181,6 +211,16 @@ clear_allocno_live (allocno_t a) curr_reg_pressure [cover_class] -= reg_class_nregs [cover_class] [ALLOCNO_MODE (a)]; ira_assert (curr_reg_pressure [cover_class] >= 0); + if (high_pressure_start_point [cover_class] >= 0 + && (curr_reg_pressure [cover_class] + <= available_class_regs [cover_class])) + { + EXECUTE_IF_SET_IN_ALLOCNO_SET (allocnos_live, i, + { + update_allocno_pressure_excess_length (allocnos [i]); + }); + high_pressure_start_point [cover_class] = -1; + } } CLEAR_ALLOCNO_LIVE (ALLOCNO_NUM (a)); bitmap_clear_bit (allocnos_live_bitmap, ALLOCNO_NUM (a)); @@ -247,6 +287,10 @@ mark_reg_store (rtx reg, const_rtx setter ATTRIBUTE_UNUSED, { cover_class = class_translate [REGNO_REG_CLASS (regno)]; curr_reg_pressure [cover_class]++; + if (high_pressure_start_point [cover_class] < 0 + && (curr_reg_pressure [cover_class] + > available_class_regs [cover_class])) + high_pressure_start_point [cover_class] = curr_point; make_regno_born (regno); if (curr_bb_node->reg_pressure [cover_class] < curr_reg_pressure [cover_class]) @@ -302,6 +346,7 @@ mark_reg_conflicts (rtx reg) static void mark_reg_death (rtx reg) { + int i; int regno = REGNO (reg); if (regno >= FIRST_PSEUDO_REGISTER) @@ -327,6 +372,16 @@ mark_reg_death (rtx reg) { cover_class = class_translate [REGNO_REG_CLASS (regno)]; curr_reg_pressure [cover_class]--; + if (high_pressure_start_point [cover_class] >= 0 + && (curr_reg_pressure [cover_class] + <= available_class_regs [cover_class])) + { + EXECUTE_IF_SET_IN_ALLOCNO_SET (allocnos_live, i, + { + update_allocno_pressure_excess_length (allocnos [i]); + }); + high_pressure_start_point [cover_class] = -1; + } ira_assert (curr_reg_pressure [cover_class] >= 0); make_regno_dead (regno); } @@ -577,12 +632,17 @@ process_bb_node_lives (loop_tree_node_t loop_tree_node) if (bb != NULL) { for (i = 0; i < reg_class_cover_size; i++) - curr_reg_pressure [reg_class_cover [i]] = 0; + { + curr_reg_pressure [reg_class_cover [i]] = 0; + high_pressure_start_point [reg_class_cover [i]] = -1; + } curr_bb_node = loop_tree_node; reg_live_in = DF_LR_IN (bb); memset (allocnos_live, 0, allocno_set_words * sizeof (INT_TYPE)); REG_SET_TO_HARD_REG_SET (hard_regs_live, reg_live_in); AND_COMPL_HARD_REG_SET (hard_regs_live, eliminable_regset); + /* ??? !!!! No alloc regs for pressure */ + AND_COMPL_HARD_REG_SET (hard_regs_live, no_alloc_regs); for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) if (TEST_HARD_REG_BIT (hard_regs_live, i)) { @@ -594,6 +654,8 @@ process_bb_node_lives (loop_tree_node_t loop_tree_node) < curr_reg_pressure [cover_class]) curr_bb_node->reg_pressure [cover_class] = curr_reg_pressure [cover_class]; + ira_assert (curr_reg_pressure [cover_class] + <= available_class_regs [cover_class]); } bitmap_clear (allocnos_live_bitmap); EXECUTE_IF_SET_IN_BITMAP (reg_live_in, FIRST_PSEUDO_REGISTER, j, bi) diff --git a/gcc/ira.c b/gcc/ira.c index 4b7529cd4ee..8041a85524f 100644 --- a/gcc/ira.c +++ b/gcc/ira.c @@ -686,7 +686,7 @@ enum reg_class reg_class_union [N_REG_CLASSES] [N_REG_CLASSES]; static void setup_reg_class_intersect_union (void) { - int i, j, cl1, cl2, cl3; + int i, cl1, cl2, cl3; HARD_REG_SET intersection_set, union_set, temp_set2; for (cl1 = 0; cl1 < N_REG_CLASSES; cl1++) @@ -723,27 +723,12 @@ setup_reg_class_intersect_union (void) reg_class_contents [(int) reg_class_union [cl1] [cl2]]); AND_COMPL_HARD_REG_SET (temp_set2, no_unit_alloc_regs); - if (! hard_reg_set_subset_p (temp_hard_regset, temp_set2)) + if (hard_reg_set_subset_p (temp_set2, temp_hard_regset)) reg_class_union [cl1] [cl2] = (enum reg_class) cl3; } } } } - /* Fix reg_class_union for cover classes: prefer the first cover - class. */ - for (i = 0; i < reg_class_cover_size; i++) - { - cl1 = reg_class_cover [i]; - COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents [cl1]); - AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); - if (hard_reg_set_equal_p (temp_hard_regset, zero_hard_reg_set)) - continue; - for (j = i + 1; j < reg_class_cover_size; j++) - { - cl2 = reg_class_cover [j]; - reg_class_union [cl1] [cl2] = reg_class_union [cl2] [cl1] = cl1; - } - } } #endif @@ -1269,7 +1254,7 @@ calculate_allocation_cost (void) reg_class_contents [ALLOCNO_COVER_CLASS (a)])); if (hard_regno < 0) { - cost = ALLOCNO_UPDATED_MEMORY_COST (a); + cost = ALLOCNO_MEMORY_COST (a); mem_cost += cost; } else if (ALLOCNO_HARD_REG_COSTS (a) != NULL) @@ -1530,6 +1515,8 @@ sort_insn_chain (int freq_p) +/* All natural loops. */ +struct loops ira_loops; /* This is the main entry of IRA. */ void @@ -1537,7 +1524,8 @@ ira (FILE *f) { int overall_cost_before, loops_p, allocated_reg_info_size; int max_regno_before_ira, max_point_before_emit; - int rebuild_p; + int rebuild_p, saved_flag_ira_algorithm; + basic_block bb; if (flag_ira_verbose < 10) { @@ -1599,6 +1587,14 @@ ira (FILE *f) overall_cost = reg_cost = mem_cost = 0; load_cost = store_cost = shuffle_cost = 0; move_loops_num = additional_jumps_num = 0; + + ira_assert (current_loops == NULL); + flow_loops_find (&ira_loops); + current_loops = &ira_loops; + saved_flag_ira_algorithm = flag_ira_algorithm; + if (number_of_loops () > (unsigned) IRA_MAX_LOOPS_NUM) + flag_ira_algorithm = IRA_ALGORITHM_CB; + if (internal_flag_ira_verbose > 0 && ira_dump_file != NULL) fprintf (ira_dump_file, "Building IRA IR\n"); loops_p = ira_build (flag_ira_algorithm == IRA_ALGORITHM_REGIONAL @@ -1692,6 +1688,17 @@ ira (FILE *f) ira_destroy (); +#if 0 + ira_assert (current_loops == &ira_loops); +#endif + flow_loops_free (&ira_loops); + free_dominance_info (CDI_DOMINATORS); + FOR_ALL_BB (bb) + bb->loop_father = NULL; + current_loops = NULL; + + flag_ira_algorithm = saved_flag_ira_algorithm; + cleanup_cfg (CLEANUP_EXPENSIVE); regstat_free_ri (); diff --git a/gcc/ira.h b/gcc/ira.h index 3d3fedcdf24..d53da4152f6 100644 --- a/gcc/ira.h +++ b/gcc/ira.h @@ -60,10 +60,13 @@ extern rtx ira_eliminate_regs (rtx, enum machine_mode); extern void sort_insn_chain (int); extern void ira (FILE *); +extern void sort_regnos_for_alter_reg (int *, int); extern void mark_allocation_change (int); extern void mark_memory_move_deletion (int, int); extern int reassign_pseudos (int *, int, HARD_REG_SET, HARD_REG_SET *, HARD_REG_SET *, bitmap); extern rtx reuse_stack_slot (int, unsigned int, unsigned int); extern void mark_new_stack_slot (rtx, int, unsigned int); +extern int better_spill_reload_regno_p (int *, int *, rtx, rtx, rtx); extern void collect_pseudo_call_clobbered_regs (int, HARD_REG_SET *); + diff --git a/gcc/params.def b/gcc/params.def index 0428c3120af..6c6c5e6fdb2 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -729,6 +729,11 @@ DEFPARAM (PARAM_DF_DOUBLE_QUEUE_THRESHOLD_FACTOR, "Multiplier used for determining the double-queueing threshold", 2, 0, 0) +DEFPARAM (PARAM_IRA_MAX_LOOPS_NUM, + "ira-max-loops-num", + "max loops number for regional RA", + 50, 0, 0) + /* Local variables: mode:c diff --git a/gcc/params.h b/gcc/params.h index 7c54b5da292..505affd22fa 100644 --- a/gcc/params.h +++ b/gcc/params.h @@ -171,4 +171,6 @@ typedef enum compiler_param PARAM_VALUE (PARAM_L2_CACHE_SIZE) #define USE_CANONICAL_TYPES \ PARAM_VALUE (PARAM_USE_CANONICAL_TYPES) +#define IRA_MAX_LOOPS_NUM \ + PARAM_VALUE (PARAM_IRA_MAX_LOOPS_NUM) #endif /* ! GCC_PARAMS_H */ diff --git a/gcc/reload1.c b/gcc/reload1.c index ec963f320d5..9586c9956a3 100644 --- a/gcc/reload1.c +++ b/gcc/reload1.c @@ -689,20 +689,6 @@ static int failure; /* Temporary array of pseudo-register number. */ static int *temp_pseudo_reg_arr; -/* The function is used to sort pseudos according their usage - frequencies (putting most frequently ones first). */ -static int -pseudo_reg_compare (const void *v1p, const void *v2p) -{ - int regno1 = *(int *) v1p; - int regno2 = *(int *) v2p; - int diff; - - if ((diff = REG_FREQ (regno2) - REG_FREQ (regno1)) != 0) - return diff; - return regno1 - regno2; -} - /* Main entry point for the reload pass. FIRST is the first insn of the function being compiled. @@ -911,13 +897,10 @@ reload (rtx first, int global) temp_pseudo_reg_arr [n++] = i; if (flag_ira) - qsort (temp_pseudo_reg_arr, n, sizeof (int), pseudo_reg_compare); - if (frame_pointer_needed || ! flag_ira) - for (i = 0; i < n; i++) - alter_reg (temp_pseudo_reg_arr [i], -1, false); - else - for (i = n - 1; i >= 0; i--) - alter_reg (temp_pseudo_reg_arr [i], -1, false); + sort_regnos_for_alter_reg (temp_pseudo_reg_arr, n); + + for (i = 0; i < n; i++) + alter_reg (temp_pseudo_reg_arr [i], -1, false); /* If we have some registers we think can be eliminated, scan all insns to see if there is an insn that sets one of these registers to something @@ -1723,6 +1706,10 @@ static int spill_cost[FIRST_PSEUDO_REGISTER]; only the first hard reg for a multi-reg pseudo. */ static int spill_add_cost[FIRST_PSEUDO_REGISTER]; +/* Map of hard regno to pseudo regno currently occupying the hard + reg. */ +static int hard_regno_to_pseudo_regno [FIRST_PSEUDO_REGISTER]; + /* Update the spill cost arrays, considering that pseudo REG is live. */ static void @@ -1742,10 +1729,12 @@ count_pseudo (int reg) gcc_assert (r >= 0); spill_add_cost[r] += freq; - nregs = hard_regno_nregs[r][PSEUDO_REGNO_MODE (reg)]; while (nregs-- > 0) - spill_cost[r + nregs] += freq; + { + hard_regno_to_pseudo_regno [r + nregs] = reg; + spill_cost[r + nregs] += freq; + } } /* Calculate the SPILL_COST and SPILL_ADD_COST arrays and determine the @@ -1763,6 +1752,8 @@ order_regs_for_reload (struct insn_chain *chain) memset (spill_cost, 0, sizeof spill_cost); memset (spill_add_cost, 0, sizeof spill_add_cost); + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + hard_regno_to_pseudo_regno [i] = -1; /* Count number of uses of each hard reg by pseudo regs allocated to it and then order them by decreasing use. First exclude hard registers @@ -1818,7 +1809,10 @@ count_spilled_pseudo (int spilled, int spilled_nregs, int reg) spill_add_cost[r] -= freq; while (nregs-- > 0) - spill_cost[r + nregs] -= freq; + { + hard_regno_to_pseudo_regno [r + nregs] = -1; + spill_cost[r + nregs] -= freq; + } } /* Find reload register to use for reload number ORDER. */ @@ -1830,11 +1824,13 @@ find_reg (struct insn_chain *chain, int order) struct reload *rl = rld + rnum; int best_cost = INT_MAX; int best_reg = -1; - unsigned int i, j; + unsigned int i, j, n; int k; HARD_REG_SET not_usable; HARD_REG_SET used_by_other_reload; reg_set_iterator rsi; + static int regno_pseudo_regs [FIRST_PSEUDO_REGISTER]; + static int best_regno_pseudo_regs [FIRST_PSEUDO_REGISTER]; COPY_HARD_REG_SET (not_usable, bad_spill_regs); IOR_HARD_REG_SET (not_usable, bad_spill_regs_global); @@ -1871,6 +1867,35 @@ find_reg (struct insn_chain *chain, int order) } if (! ok) continue; + + if (flag_ira) + { + for (n = j = 0; j < this_nregs; j++) + { + int r = hard_regno_to_pseudo_regno [regno + j]; + + if (r < 0) + continue; + if (n == 0 || regno_pseudo_regs [n - 1] != r) + regno_pseudo_regs [n++] = r; + } + regno_pseudo_regs [n++] = -1; + if (best_reg < 0 + || better_spill_reload_regno_p (regno_pseudo_regs, + best_regno_pseudo_regs, + rl->in, rl->out, chain->insn)) + { + best_reg = regno; + for (j = 0;; j++) + { + best_regno_pseudo_regs [j] = regno_pseudo_regs [j]; + if (regno_pseudo_regs [j] < 0) + break; + } + } + continue; + } + if (rl->in && REG_P (rl->in) && REGNO (rl->in) == regno) this_cost--; if (rl->out && REG_P (rl->out) && REGNO (rl->out) == regno) @@ -1918,6 +1943,7 @@ find_reg (struct insn_chain *chain, int order) { gcc_assert (spill_cost[best_reg + i] == 0); gcc_assert (spill_add_cost[best_reg + i] == 0); + gcc_assert (hard_regno_to_pseudo_regno [best_reg + i] == -1); SET_HARD_REG_BIT (used_spill_regs_local, best_reg + i); } return 1; -- 2.11.4.GIT