From 3d11a62d4c13cd9bd22d2305a661cea48900488e Mon Sep 17 00:00:00 2001 From: vmakarov Date: Tue, 9 Oct 2007 22:10:28 +0000 Subject: [PATCH] 2007-10-09 Vladimir Makarov * toplev.h (flag_ira_coalesce): New external variable. * ira-int.h (allocno): New members first_coalesced_allocno, next_coalesced_allocno. (ALLOCNO_FIRST_COALESCED_ALLOCNO, ALLOCNO_NEXT_COALESCED_ALLOCNO): New macros. (reg_class_subintersect): New external variable. * ira-color.c (processed_allocno_bitmap): New variable. (allocno_cost_compare_func, print_coalesced_allocno): New functions. (assign_hard_reg): Process coalesced allocnos. (get_coalesced_allocnos_best_class_and_freq): New function. (add_allocno_to_ordered_bucket): Use the function. (push_allocno_to_stack, push_allocnos_to_stack): Process coalesced allocnos. (remove_allocno_from_bucket_and_push, pop_allocnos_from_stack): Use print_coalesced_allocno. (setup_allocno_available_regs_num, setup_allocno_left_conflicts_num): Process coalesced allocnos. (copy_freq_compare_func, allocno_conflict_p, coalesce_allocnos): New functions. (color_allocnos): Allocate/free processed_allocno_bitmap. Call coalesce_allocnos. (priority_coloring): Allocate/free processed_allocno_bitmap. * ira-build.c (check_coalesced_allocnos): New function. (create_allocno): Initiate next_coalesced_allocno, first_coalesced_allocno. (create_cap_allocno): Check next_coalesced_allocno, first_coalesced_allocno. * common.opt (fira-coalesce): New option. * ira.c (setup_reg_class_intersect): New function. (reg_class_subintersect): New global variable. (init_ira_once): Call setup_reg_class_intersect. * doc/invoke.texi (-fira-coalescing): New option. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/ira@129185 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 42 ++++ gcc/common.opt | 6 +- gcc/doc/invoke.texi | 21 +- gcc/ira-build.c | 24 +++ gcc/ira-color.c | 592 ++++++++++++++++++++++++++++++++++++++++------------ gcc/ira-int.h | 10 + gcc/ira.c | 30 +++ gcc/toplev.h | 1 + 8 files changed, 587 insertions(+), 139 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d050a1b3717..6c64d784fa6 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,45 @@ +2007-10-09 Vladimir Makarov + + * toplev.h (flag_ira_coalesce): New external variable. + + * ira-int.h (allocno): New members first_coalesced_allocno, + next_coalesced_allocno. + (ALLOCNO_FIRST_COALESCED_ALLOCNO, ALLOCNO_NEXT_COALESCED_ALLOCNO): + New macros. + (reg_class_subintersect): New external variable. + + * ira-color.c (processed_allocno_bitmap): New variable. + (allocno_cost_compare_func, print_coalesced_allocno): New + functions. + (assign_hard_reg): Process coalesced allocnos. + (get_coalesced_allocnos_best_class_and_freq): New function. + (add_allocno_to_ordered_bucket): Use the function. + (push_allocno_to_stack, push_allocnos_to_stack): Process coalesced + allocnos. + (remove_allocno_from_bucket_and_push, pop_allocnos_from_stack): + Use print_coalesced_allocno. + (setup_allocno_available_regs_num, + setup_allocno_left_conflicts_num): Process coalesced allocnos. + (copy_freq_compare_func, allocno_conflict_p, coalesce_allocnos): + New functions. + (color_allocnos): Allocate/free processed_allocno_bitmap. Call + coalesce_allocnos. + (priority_coloring): Allocate/free processed_allocno_bitmap. + + * ira-build.c (check_coalesced_allocnos): New function. + (create_allocno): Initiate next_coalesced_allocno, + first_coalesced_allocno. + (create_cap_allocno): Check next_coalesced_allocno, + first_coalesced_allocno. + + * common.opt (fira-coalesce): New option. + + * ira.c (setup_reg_class_intersect): New function. + (reg_class_subintersect): New global variable. + (init_ira_once): Call setup_reg_class_intersect. + + * doc/invoke.texi (-fira-coalescing): New option. + 2007-09-20 Vladimir Makarov * ira-build.c (create_allocno): Initialize ALLOCNO_BEST_CLASS. diff --git a/gcc/common.opt b/gcc/common.opt index f7f8b09c55e..dcc8c4b2125 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -628,9 +628,13 @@ fira-biased-coloring Common Report Var(flag_ira_biased_coloring) Use biased coloring for the integrated register allocator. +fira-coalesce +Common Report Var(flag_ira_coalesce) +Do optimistic coalescing. + fira-ipra Common Report Var(flag_ira_ipra) Init(0) -Inter-procedural register allocation for IRA +Inter-procedural register allocation for IRA. fira-move-spills Common Report Var(flag_ira_move_spills) Init(1) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 7e25cc0e84f..f1c6506f5f9 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -328,9 +328,9 @@ Objective-C and Objective-C++ Dialects}. -fgcse -fgcse-lm -fgcse-sm -fgcse-las -fgcse-after-reload @gol -fcrossjumping -fif-conversion -fif-conversion2 @gol -finline-functions -finline-functions-called-once @gol --finline-limit=@var{n} -fira -fira-alogirthm=@var{algorithm} @gol +-finline-limit=@var{n} -fira -fira-algorithm=@var{algorithm} @gol -fno-ira-assign-after-call-split @gol --fira-biased-coloring -fira-ipra -fno-ira-move-spills @gol +-fira-biased-coloring -fira-coalesce -fira-ipra -fno-ira-move-spills @gol -fira-propagate-cost @gol -fno-ira-share-spill-slots -fno-ira-share-save-slots @gol -fno-ira-split-around-calls @gol @@ -5526,12 +5526,6 @@ Enabled at levels @option{-O2}, @option{-O3}, @option{-Os}. If supported for the target machine, use the integrated register allocator (@acronym{IRA}) for the register allocation. -@item -fira-biased-coloring -@opindex fira-biased-coloring -Use biased coloring for the integrated register allocator for possible -improvement of the generated code. It makes register allocator -slower. - @item -fira-algorithm=@var{algorithm} Use specified algorithm for the integrated register allocator. The @var{algorithm} argument should be one of @code{regional}, @code{CB}, @@ -5554,6 +5548,17 @@ After splitting pseudos there is a chance that spilled pseudos conflicting with the new pseudos living through calls gets a hard register. +@item -fira-biased-coloring +@opindex fira-biased-coloring +Use biased coloring for the integrated register allocator for possible +improvement of the generated code. It makes register allocator +slower. + +@item -fira-coalescing +@opindex fira-coalescing +Do optimistic register coalescing. This option might be profitable for +architectures with big regular register files. + @item -fira-ipra @opindex fira-ipra Switch on a simple form of inter-procedural register allocation when diff --git a/gcc/ira-build.c b/gcc/ira-build.c index f237918272a..32ca60145de 100644 --- a/gcc/ira-build.c +++ b/gcc/ira-build.c @@ -63,6 +63,9 @@ static void create_loop_allocnos (edge); static void create_loop_tree_node_allocnos (struct ira_loop_tree_node *); static void create_allocnos (void); static void create_loop_tree_node_caps (struct ira_loop_tree_node *); +#ifdef ENABLE_IRA_CHECKING +static void check_coalesced_allocnos (void); +#endif /* All natural loops. */ struct loops ira_loops; @@ -394,6 +397,8 @@ create_allocno (int regno, int cap_p, struct ira_loop_tree_node *loop_tree_node) ALLOCNO_MEMORY_COST (a) = 0; ALLOCNO_NEXT_BUCKET_ALLOCNO (a) = NULL; ALLOCNO_PREV_BUCKET_ALLOCNO (a) = NULL; + ALLOCNO_FIRST_COALESCED_ALLOCNO (a) = a; + ALLOCNO_NEXT_COALESCED_ALLOCNO (a) = a; VARRAY_PUSH_GENERIC_PTR (allocno_varray, a); allocnos = (allocno_t *) &VARRAY_GENERIC_PTR (allocno_varray, 0); allocnos_num = VARRAY_ACTIVE_SIZE (allocno_varray); @@ -487,6 +492,8 @@ create_cap_allocno (allocno_t a) allocno_t *allocno_vec; struct ira_loop_tree_node *father; + ira_assert (ALLOCNO_FIRST_COALESCED_ALLOCNO (a) == a + && ALLOCNO_NEXT_COALESCED_ALLOCNO (a) == a); father = ALLOCNO_LOOP_TREE_NODE (a)->father; cap = create_allocno (ALLOCNO_REGNO (a), TRUE, father); /* We just need to set a mode giving the same size. */ @@ -888,6 +895,23 @@ create_loop_tree_node_caps (struct ira_loop_tree_node *loop_node) +#ifdef ENABLE_IRA_CHECKING +/* The function checks that there are no coalesced allocnos. */ +static void +check_coalesced_allocnos (void) +{ + int i; + allocno_t a; + + for (i = 0; i < allocnos_num; i++) + { + a = allocnos [i]; + ira_assert (ALLOCNO_FIRST_COALESCED_ALLOCNO (a) == a + && ALLOCNO_NEXT_COALESCED_ALLOCNO (a) == a); + } +} +#endif + /* This entry function creates internal representation for IRA (allocnos, copies, loop tree nodes). If LOOPS_P is zero the nodes corresponding to the loops (except the root which corresponds the diff --git a/gcc/ira-color.c b/gcc/ira-color.c index edce519de9a..06b896e1c04 100644 --- a/gcc/ira-color.c +++ b/gcc/ira-color.c @@ -79,6 +79,9 @@ static bitmap coloring_allocno_bitmap; allocnos. */ static bitmap consideration_allocno_bitmap; +/* ??? */ +static bitmap processed_allocno_bitmap; + /* All allocnos sorted accoring their priorities. */ static allocno_t *sorted_allocnos; @@ -139,6 +142,40 @@ update_copy_costs (allocno_t allocno, int decr_p) } } +/* The function is used to sort allocnos according to the profit to + use a hard register instead of memory for them. */ +static int +allocno_cost_compare_func (const void *v1p, const void *v2p) +{ + allocno_t p1 = *(const allocno_t *) v1p, p2 = *(const allocno_t *) v2p; + int cost1, cost2; + + cost1 = ALLOCNO_MEMORY_COST (p1) - ALLOCNO_COVER_CLASS_COST (p1); + cost2 = ALLOCNO_MEMORY_COST (p2) - ALLOCNO_COVER_CLASS_COST (p2); + if (cost1 - cost2) + return cost1 - cost2; + + /* If regs are equally good, sort by allocnos, so that the results of + qsort leave nothing to chance. */ + return ALLOCNO_NUM (p1) - ALLOCNO_NUM (p2); +} + +/* Print all allocnos coalesced with ALLOCNO. */ +static void +print_coalesced_allocno (allocno_t allocno) +{ + allocno_t a; + + for (a = ALLOCNO_NEXT_COALESCED_ALLOCNO (allocno);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + print_expanded_allocno (a); + if (a == allocno) + break; + fprintf (ira_dump_file, "+"); + } +} + /* Varray representing the stack of allocnos used during coloring. */ static varray_type allocno_stack_varray; @@ -151,11 +188,11 @@ assign_hard_reg (allocno_t allocno, int retry_p) HARD_REG_SET conflicting_regs, biased_regs; int i, j, hard_regno, best_hard_regno, class_size; int cost, mem_cost, min_cost, full_cost, min_full_cost, add_cost; - int *costs; + int costs [FIRST_PSEUDO_REGISTER], *a_costs; int *conflict_costs; enum reg_class cover_class, class; enum machine_mode mode; - allocno_t conflict_allocno, conflict_allocno2; + allocno_t a, conflict_allocno, conflict_allocno2; allocno_t *allocno_vec, *allocno_vec2; allocno_t another_allocno; copy_t cp, next_cp; @@ -173,80 +210,110 @@ assign_hard_reg (allocno_t allocno, int retry_p) best_hard_regno = -1; mode = ALLOCNO_MODE (allocno); memset (full_costs, 0, sizeof (int) * class_size); - mem_cost = ALLOCNO_MEMORY_COST (allocno); - allocno_vec = ALLOCNO_CONFLICT_ALLOCNO_VEC (allocno); - IOR_HARD_REG_SET (conflicting_regs, ALLOCNO_CONFLICT_HARD_REGS (allocno)); - costs = ALLOCNO_CURR_HARD_REG_COSTS (allocno); - memcpy (full_costs, costs, sizeof (int) * class_size); + mem_cost = 0; + bitmap_clear (processed_allocno_bitmap); + memset (costs, 0, sizeof (int) * class_size); + memset (full_costs, 0, sizeof (int) * class_size); #ifdef STACK_REGS - no_stack_reg_p = ALLOCNO_NO_STACK_REG_P (allocno); + no_stack_reg_p = FALSE; #endif - for (i = 0; (conflict_allocno = allocno_vec [i]) != NULL; i++) - /* Reload can give another class so we need to check all - allocnos. */ - if (retry_p - || (cover_class == ALLOCNO_COVER_CLASS (conflict_allocno) - && bitmap_bit_p (consideration_allocno_bitmap, - ALLOCNO_NUM (conflict_allocno)))) - { - if (ALLOCNO_ASSIGNED_P (conflict_allocno)) + for (a = ALLOCNO_NEXT_COALESCED_ALLOCNO (allocno);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + mem_cost += ALLOCNO_MEMORY_COST (a); + allocno_vec = ALLOCNO_CONFLICT_ALLOCNO_VEC (a); + IOR_HARD_REG_SET (conflicting_regs, ALLOCNO_CONFLICT_HARD_REGS (a)); + a_costs = ALLOCNO_CURR_HARD_REG_COSTS (a); +#ifdef STACK_REGS + no_stack_reg_p = no_stack_reg_p || ALLOCNO_NO_STACK_REG_P (a); +#endif + for (i = 0; i < class_size; i++) + { + costs [i] += a_costs [i]; + full_costs [i] += a_costs [i]; + } + for (i = 0; (conflict_allocno = allocno_vec [i]) != NULL; i++) + /* Reload can give another class so we need to check all + allocnos. */ + if (retry_p + || (cover_class == ALLOCNO_COVER_CLASS (conflict_allocno) + && bitmap_bit_p (consideration_allocno_bitmap, + ALLOCNO_NUM (conflict_allocno)))) { - if ((hard_regno = ALLOCNO_HARD_REGNO (conflict_allocno)) >= 0) + if (bitmap_bit_p (processed_allocno_bitmap, + ALLOCNO_NUM (conflict_allocno))) + continue; + bitmap_set_bit (processed_allocno_bitmap, + ALLOCNO_NUM (conflict_allocno)); + if (ALLOCNO_ASSIGNED_P (conflict_allocno)) { - IOR_HARD_REG_SET - (conflicting_regs, - reg_mode_hard_regset - [hard_regno] [ALLOCNO_MODE (conflict_allocno)]); - if (hard_reg_set_subset_p (reg_class_contents [cover_class], - conflicting_regs)) - goto fail; + if ((hard_regno = ALLOCNO_HARD_REGNO (conflict_allocno)) >= 0) + { + IOR_HARD_REG_SET + (conflicting_regs, + reg_mode_hard_regset + [hard_regno] [ALLOCNO_MODE (conflict_allocno)]); + if (hard_reg_set_subset_p (reg_class_contents + [cover_class], + conflicting_regs)) + goto fail; + } + continue; } - continue; - } - else if (! ALLOCNO_MAY_BE_SPILLED_P (conflict_allocno)) - { - conflict_costs - = ALLOCNO_CURR_CONFLICT_HARD_REG_COSTS (conflict_allocno); - if (conflict_costs != NULL) - for (j = class_size - 1; j >= 0; j--) - full_costs [j] -= conflict_costs [j]; + else if (! ALLOCNO_MAY_BE_SPILLED_P (conflict_allocno)) + { + conflict_costs + = ALLOCNO_CURR_CONFLICT_HARD_REG_COSTS (conflict_allocno); + if (conflict_costs != NULL) + for (j = class_size - 1; j >= 0; j--) + full_costs [j] -= conflict_costs [j]; + } + if (retry_p || ! flag_ira_biased_coloring) + continue; + allocno_vec2 = ALLOCNO_CONFLICT_ALLOCNO_VEC (conflict_allocno); + for (j = 0; (conflict_allocno2 = allocno_vec2 [j]) != NULL; j++) + if (cover_class == ALLOCNO_COVER_CLASS (conflict_allocno2) + && ALLOCNO_ASSIGNED_P (conflict_allocno2) + && (hard_regno = ALLOCNO_HARD_REGNO (conflict_allocno2)) >= 0 + && (retry_p + || bitmap_bit_p (consideration_allocno_bitmap, + ALLOCNO_NUM (conflict_allocno2)))) + IOR_HARD_REG_SET (biased_regs, + reg_mode_hard_regset [hard_regno] + [ALLOCNO_MODE (conflict_allocno2)]); } - if (retry_p || ! flag_ira_biased_coloring) - continue; - allocno_vec2 = ALLOCNO_CONFLICT_ALLOCNO_VEC (conflict_allocno); - for (j = 0; (conflict_allocno2 = allocno_vec2 [j]) != NULL; j++) - if (cover_class == ALLOCNO_COVER_CLASS (conflict_allocno2) - && ALLOCNO_ASSIGNED_P (conflict_allocno2) - && (hard_regno = ALLOCNO_HARD_REGNO (conflict_allocno2)) >= 0 - && (retry_p - || bitmap_bit_p (consideration_allocno_bitmap, - ALLOCNO_NUM (conflict_allocno2)))) - IOR_HARD_REG_SET (biased_regs, - reg_mode_hard_regset - [hard_regno] [ALLOCNO_MODE (conflict_allocno2)]); - } - for (cp = ALLOCNO_COPIES (allocno); cp != NULL; cp = next_cp) + if (a == allocno) + break; + } + for (a = ALLOCNO_NEXT_COALESCED_ALLOCNO (allocno);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) { - if (cp->first == allocno) + for (cp = ALLOCNO_COPIES (a); cp != NULL; cp = next_cp) { - next_cp = cp->next_first_allocno_copy; - another_allocno = cp->second; - } - else if (cp->second == allocno) - { - next_cp = cp->next_second_allocno_copy; - another_allocno = cp->first; + if (cp->first == a) + { + next_cp = cp->next_first_allocno_copy; + another_allocno = cp->second; + } + else if (cp->second == a) + { + next_cp = cp->next_second_allocno_copy; + another_allocno = cp->first; + } + else + gcc_unreachable (); + if (cover_class != ALLOCNO_COVER_CLASS (another_allocno) + || ALLOCNO_ASSIGNED_P (another_allocno)) + continue; + conflict_costs + = ALLOCNO_CURR_CONFLICT_HARD_REG_COSTS (another_allocno); + if (conflict_costs != NULL + && ! ALLOCNO_MAY_BE_SPILLED_P (another_allocno)) + for (j = class_size - 1; j >= 0; j--) + full_costs [j] += conflict_costs [j]; } - else - gcc_unreachable (); - if (cover_class != ALLOCNO_COVER_CLASS (another_allocno) - || ALLOCNO_ASSIGNED_P (another_allocno)) - continue; - conflict_costs = ALLOCNO_CURR_CONFLICT_HARD_REG_COSTS (another_allocno); - if (conflict_costs != NULL - && ! ALLOCNO_MAY_BE_SPILLED_P (another_allocno)) - for (j = class_size - 1; j >= 0; j--) - full_costs [j] += conflict_costs [j]; + if (a == allocno) + break; } min_cost = min_full_cost = INT_MAX; /* We don't care about giving callee saved registers to allocnos no @@ -297,12 +364,44 @@ assign_hard_reg (allocno_t allocno, int retry_p) if (min_cost > mem_cost) best_hard_regno = -1; fail: + if (best_hard_regno < 0 + && ALLOCNO_NEXT_COALESCED_ALLOCNO (allocno) != allocno) + { + for (j = 0, a = ALLOCNO_NEXT_COALESCED_ALLOCNO (allocno);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + sorted_allocnos [j++] = a; + if (a == allocno) + break; + } + qsort (sorted_allocnos, j, sizeof (allocno_t), + allocno_cost_compare_func); + for (i = 0; i < j; i++) + { + a = sorted_allocnos [i]; + ALLOCNO_FIRST_COALESCED_ALLOCNO (a) = a; + ALLOCNO_NEXT_COALESCED_ALLOCNO (a) = a; + VARRAY_PUSH_GENERIC_PTR (allocno_stack_varray, a); + if (ira_dump_file != NULL) + { + fprintf (ira_dump_file, " Pushing"); + print_coalesced_allocno (a); + } + } + return FALSE; + } if (best_hard_regno >= 0) allocated_hardreg_p [best_hard_regno] = TRUE; - ALLOCNO_HARD_REGNO (allocno) = best_hard_regno; - ALLOCNO_ASSIGNED_P (allocno) = TRUE; - if (best_hard_regno >= 0) - update_copy_costs (allocno, TRUE); + for (a = ALLOCNO_NEXT_COALESCED_ALLOCNO (allocno);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + ALLOCNO_HARD_REGNO (a) = best_hard_regno; + ALLOCNO_ASSIGNED_P (a) = TRUE; + if (best_hard_regno >= 0) + update_copy_costs (a, TRUE); + if (a == allocno) + break; + } return best_hard_regno >= 0; } @@ -332,6 +431,28 @@ add_allocno_to_bucket (allocno_t allocno, allocno_t *bucket_ptr) *bucket_ptr = allocno; } +/* The function returns best class and frequency for allocnos + coalesced with ALLOCNO. */ +static void +get_coalesced_allocnos_best_class_and_freq (allocno_t allocno, + enum reg_class *best_class, + int *freq) +{ + allocno_t a; + + *freq = 0; + *best_class = ALL_REGS; + for (a = ALLOCNO_NEXT_COALESCED_ALLOCNO (allocno);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + *freq += ALLOCNO_FREQ (a); + *best_class + = reg_class_subintersect [ALLOCNO_BEST_CLASS (a)] [*best_class]; + if (a == allocno) + break; + } +} + /* Add ALLOCNO to *BUCKET_PTR bucket maintaining the order according their frequency. ALLOCNO should be not in a bucket before the call. */ @@ -344,14 +465,13 @@ add_allocno_to_ordered_bucket (allocno_t allocno, allocno_t *bucket_ptr) cover_class = ALLOCNO_COVER_CLASS (allocno); nregs = reg_class_nregs [cover_class] [ALLOCNO_MODE (allocno)]; - freq = ALLOCNO_FREQ (allocno); - best_class = ALLOCNO_BEST_CLASS (allocno); + get_coalesced_allocnos_best_class_and_freq (allocno, &best_class, &freq); for (before = *bucket_ptr, after = NULL; before != NULL; after = before, before = ALLOCNO_NEXT_BUCKET_ALLOCNO (before)) { - best_class_before = ALLOCNO_BEST_CLASS (before); - freq_before = ALLOCNO_FREQ (before); + get_coalesced_allocnos_best_class_and_freq + (before, &best_class_before, &freq_before); if (best_class != best_class_before && class_subset_p [best_class_before] [best_class]) break; @@ -398,7 +518,7 @@ static void push_allocno_to_stack (allocno_t allocno) { int i, conflicts_num, conflict_size, size; - allocno_t conflict_allocno; + allocno_t a, conflict_allocno; allocno_t *allocno_vec; enum reg_class cover_class; @@ -408,36 +528,48 @@ push_allocno_to_stack (allocno_t allocno) if (cover_class == NO_REGS) return; size = reg_class_nregs [cover_class] [ALLOCNO_MODE (allocno)]; - allocno_vec = ALLOCNO_CONFLICT_ALLOCNO_VEC (allocno); - for (i = 0; (conflict_allocno = allocno_vec [i]) != NULL; i++) - if (cover_class == ALLOCNO_COVER_CLASS (conflict_allocno) - && bitmap_bit_p (coloring_allocno_bitmap, - ALLOCNO_NUM (conflict_allocno))) - { - if (ALLOCNO_IN_GRAPH_P (conflict_allocno) - && ! ALLOCNO_ASSIGNED_P (conflict_allocno)) + bitmap_clear (processed_allocno_bitmap); + for (a = ALLOCNO_NEXT_COALESCED_ALLOCNO (allocno);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + allocno_vec = ALLOCNO_CONFLICT_ALLOCNO_VEC (a); + for (i = 0; (conflict_allocno = allocno_vec [i]) != NULL; i++) + if (cover_class == ALLOCNO_COVER_CLASS (conflict_allocno) + && bitmap_bit_p (coloring_allocno_bitmap, + ALLOCNO_NUM (conflict_allocno))) { - conflicts_num = ALLOCNO_LEFT_CONFLICTS_NUM (conflict_allocno); - conflict_size - = (reg_class_nregs - [cover_class] [ALLOCNO_MODE (conflict_allocno)]); - ira_assert - (ALLOCNO_LEFT_CONFLICTS_NUM (conflict_allocno) >= size); - ALLOCNO_LEFT_CONFLICTS_NUM (conflict_allocno) -= size; - if (conflicts_num + conflict_size - <= ALLOCNO_AVAILABLE_REGS_NUM (conflict_allocno)) + if (bitmap_bit_p (processed_allocno_bitmap, + ALLOCNO_NUM (conflict_allocno))) continue; - conflicts_num = ALLOCNO_LEFT_CONFLICTS_NUM (conflict_allocno); - if (conflicts_num + conflict_size - <= ALLOCNO_AVAILABLE_REGS_NUM (conflict_allocno)) + bitmap_set_bit (processed_allocno_bitmap, + ALLOCNO_NUM (conflict_allocno)); + if (ALLOCNO_IN_GRAPH_P (conflict_allocno) + && ! ALLOCNO_ASSIGNED_P (conflict_allocno)) { - delete_allocno_from_bucket - (conflict_allocno, &uncolorable_allocno_bucket); - add_allocno_to_ordered_bucket (conflict_allocno, - &colorable_allocno_bucket); + conflicts_num = ALLOCNO_LEFT_CONFLICTS_NUM (conflict_allocno); + conflict_size + = (reg_class_nregs + [cover_class] [ALLOCNO_MODE (conflict_allocno)]); + ira_assert + (ALLOCNO_LEFT_CONFLICTS_NUM (conflict_allocno) >= size); + ALLOCNO_LEFT_CONFLICTS_NUM (conflict_allocno) -= size; + if (conflicts_num + conflict_size + <= ALLOCNO_AVAILABLE_REGS_NUM (conflict_allocno)) + continue; + conflicts_num = ALLOCNO_LEFT_CONFLICTS_NUM (conflict_allocno); + if (conflicts_num + conflict_size + <= ALLOCNO_AVAILABLE_REGS_NUM (conflict_allocno)) + { + delete_allocno_from_bucket + (conflict_allocno, &uncolorable_allocno_bucket); + add_allocno_to_ordered_bucket (conflict_allocno, + &colorable_allocno_bucket); + } } } - } + if (a == allocno) + break; + } } /* The function puts ALLOCNO onto the coloring stack and removes it @@ -455,7 +587,7 @@ remove_allocno_from_bucket_and_push (allocno_t allocno, int colorable_p) if (ira_dump_file != NULL) { fprintf (ira_dump_file, " Pushing"); - print_expanded_allocno (allocno); + print_coalesced_allocno (allocno); fprintf (ira_dump_file, "%s\n", colorable_p ? "" : "(potential spill)"); } cover_class = ALLOCNO_COVER_CLASS (allocno); @@ -652,8 +784,21 @@ push_allocnos_to_stack (void) { i++; if (ALLOCNO_TEMP (i_allocno) == INT_MAX) - ALLOCNO_TEMP (i_allocno) - = calculate_allocno_spill_cost (i_allocno); + { + allocno_t a; + int cost = 0; + + for (a = ALLOCNO_NEXT_COALESCED_ALLOCNO (i_allocno);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + cost += calculate_allocno_spill_cost (i_allocno); + if (a == i_allocno) + break; + } + /* ??? Remove cost of copies between the coalesced + allocnos. */ + ALLOCNO_TEMP (i_allocno) = cost; + } i_allocno_pri = ((double) ALLOCNO_TEMP (i_allocno) / (ALLOCNO_LEFT_CONFLICTS_NUM (i_allocno) @@ -696,7 +841,7 @@ pop_allocnos_from_stack (void) if (ira_dump_file != NULL) { fprintf (ira_dump_file, " Popping"); - print_expanded_allocno (allocno); + print_coalesced_allocno (allocno); fprintf (ira_dump_file, " -- "); } if (cover_class == NO_REGS) @@ -712,7 +857,7 @@ pop_allocnos_from_stack (void) fprintf (ira_dump_file, "assign reg %d\n", ALLOCNO_HARD_REGNO (allocno)); } - else + else if (ALLOCNO_ASSIGNED_P (allocno)) { if (ira_dump_file != NULL) fprintf (ira_dump_file, "spill\n"); @@ -727,13 +872,22 @@ setup_allocno_available_regs_num (allocno_t allocno) { int i, n; enum reg_class cover_class; + allocno_t a; HARD_REG_SET temp_set; cover_class = ALLOCNO_COVER_CLASS (allocno); ALLOCNO_AVAILABLE_REGS_NUM (allocno) = available_class_regs [cover_class]; if (cover_class == NO_REGS) return; - COPY_HARD_REG_SET (temp_set, ALLOCNO_CONFLICT_HARD_REGS (allocno)); + CLEAR_HARD_REG_SET (temp_set); + ira_assert (ALLOCNO_FIRST_COALESCED_ALLOCNO (allocno) == allocno); + for (a = ALLOCNO_NEXT_COALESCED_ALLOCNO (allocno);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + IOR_HARD_REG_SET (temp_set, ALLOCNO_CONFLICT_HARD_REGS (a)); + if (a == allocno) + break; + } for (n = 0, i = class_hard_regs_num [cover_class] - 1; i >= 0; i--) if (TEST_HARD_REG_BIT (temp_set, class_hard_regs [cover_class] [i])) n++; @@ -748,7 +902,7 @@ static void setup_allocno_left_conflicts_num (allocno_t allocno) { int i, hard_regs_num, hard_regno, conflict_allocnos_size; - allocno_t conflict_allocno; + allocno_t a, conflict_allocno; allocno_t *allocno_vec; enum reg_class cover_class; HARD_REG_SET temp_set; @@ -763,7 +917,15 @@ setup_allocno_left_conflicts_num (allocno_t allocno) ALLOCNO_CONFLICT_HARD_REG_COSTS (allocno), sizeof (int) * hard_regs_num); } - COPY_HARD_REG_SET (temp_set, ALLOCNO_CONFLICT_HARD_REGS (allocno)); + CLEAR_HARD_REG_SET (temp_set); + ira_assert (ALLOCNO_FIRST_COALESCED_ALLOCNO (allocno) == allocno); + for (a = ALLOCNO_NEXT_COALESCED_ALLOCNO (allocno);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + IOR_HARD_REG_SET (temp_set, ALLOCNO_CONFLICT_HARD_REGS (a)); + if (a == allocno) + break; + } AND_HARD_REG_SET (temp_set, reg_class_contents [cover_class]); AND_COMPL_HARD_REG_SET (temp_set, no_alloc_regs); conflict_allocnos_size = 0; @@ -780,34 +942,47 @@ setup_allocno_left_conflicts_num (allocno_t allocno) } } CLEAR_HARD_REG_SET (temp_set); - allocno_vec = ALLOCNO_CONFLICT_ALLOCNO_VEC (allocno); + bitmap_clear (processed_allocno_bitmap); if (cover_class != NO_REGS) - for (i = 0; (conflict_allocno = allocno_vec [i]) != NULL; i++) - if (cover_class == ALLOCNO_COVER_CLASS (conflict_allocno) - && bitmap_bit_p (consideration_allocno_bitmap, - ALLOCNO_NUM (conflict_allocno))) - { - if (! ALLOCNO_ASSIGNED_P (conflict_allocno)) - conflict_allocnos_size - += (reg_class_nregs - [cover_class] [ALLOCNO_MODE (conflict_allocno)]); - else if ((hard_regno = ALLOCNO_HARD_REGNO (conflict_allocno)) >= 0) + for (a = ALLOCNO_NEXT_COALESCED_ALLOCNO (allocno);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + allocno_vec = ALLOCNO_CONFLICT_ALLOCNO_VEC (a); + for (i = 0; (conflict_allocno = allocno_vec [i]) != NULL; i++) + if (cover_class == ALLOCNO_COVER_CLASS (conflict_allocno) + && bitmap_bit_p (consideration_allocno_bitmap, + ALLOCNO_NUM (conflict_allocno))) { - int last = (hard_regno - + hard_regno_nregs - [hard_regno] [ALLOCNO_MODE (conflict_allocno)]); - - while (hard_regno < last) + if (bitmap_bit_p (processed_allocno_bitmap, + ALLOCNO_NUM (conflict_allocno))) + continue; + bitmap_set_bit (processed_allocno_bitmap, + ALLOCNO_NUM (conflict_allocno)); + if (! ALLOCNO_ASSIGNED_P (conflict_allocno)) + conflict_allocnos_size + += (reg_class_nregs + [cover_class] [ALLOCNO_MODE (conflict_allocno)]); + else if ((hard_regno = ALLOCNO_HARD_REGNO (conflict_allocno)) + >= 0) { - if (! TEST_HARD_REG_BIT (temp_set, hard_regno)) + int last = (hard_regno + + hard_regno_nregs + [hard_regno] [ALLOCNO_MODE (conflict_allocno)]); + + while (hard_regno < last) { - conflict_allocnos_size++; - SET_HARD_REG_BIT (temp_set, hard_regno); + if (! TEST_HARD_REG_BIT (temp_set, hard_regno)) + { + conflict_allocnos_size++; + SET_HARD_REG_BIT (temp_set, hard_regno); + } + hard_regno++; } - hard_regno++; } } - } + if (a == allocno) + break; + } ALLOCNO_LEFT_CONFLICTS_NUM (allocno) = conflict_allocnos_size; } @@ -829,6 +1004,8 @@ put_allocno_into_bucket (allocno_t allocno) ALLOCNO_CONFLICT_HARD_REG_COSTS (allocno), sizeof (int) * hard_regs_num); } + if (ALLOCNO_FIRST_COALESCED_ALLOCNO (allocno) != allocno) + return; ALLOCNO_IN_GRAPH_P (allocno) = TRUE; setup_allocno_left_conflicts_num (allocno); setup_allocno_available_regs_num (allocno); @@ -840,6 +1017,146 @@ put_allocno_into_bucket (allocno_t allocno) add_allocno_to_bucket (allocno, &uncolorable_allocno_bucket); } +/* The function is used to sort allocnos according to their + frequencies. */ +static int +copy_freq_compare_func (const void *v1p, const void *v2p) +{ + copy_t cp1 = *(const copy_t *) v1p, cp2 = *(const copy_t *) v2p; + int pri1, pri2; + + pri1 = cp1->freq; + pri2 = cp2->freq; + if (pri2 - pri1) + return pri2 - pri1; + + /* If freqencies are equal, sort by copies, so that the results of + qsort leave nothing to chance. */ + return cp1->num - cp2->num; +} + +/* The function merges two sets of coalesced allocnos given by + allocnos A1 and A2. */ +static void +merge_allocnos (allocno_t a1, allocno_t a2) +{ + allocno_t a, first, last, next; + + first = ALLOCNO_FIRST_COALESCED_ALLOCNO (a1); + if (first == ALLOCNO_FIRST_COALESCED_ALLOCNO (a2)) + return; + for (last = a2, a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a2);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + ALLOCNO_FIRST_COALESCED_ALLOCNO (a) = first; + if (a == a2) + break; + last = a; + } + next = ALLOCNO_NEXT_COALESCED_ALLOCNO (first); + ALLOCNO_NEXT_COALESCED_ALLOCNO (first) = a2; + ALLOCNO_NEXT_COALESCED_ALLOCNO (last) = next; +} + +/* The function returns non-zero if there are conflicting allocnos + from two sets of coalesced allocnos given by allocnos A1 and + A2. */ +static int +allocno_conflict_p (allocno_t a1, allocno_t a2) +{ + allocno_t a, conflict_allocno, *allocno_vec; + int i; + + bitmap_clear (processed_allocno_bitmap); + for (a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a1);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + bitmap_set_bit (processed_allocno_bitmap, ALLOCNO_NUM (a)); + if (a == a1) + break; + } + for (a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a2);; + a = ALLOCNO_NEXT_COALESCED_ALLOCNO (a)) + { + allocno_vec = ALLOCNO_CONFLICT_ALLOCNO_VEC (a); + for (i = 0; (conflict_allocno = allocno_vec [i]) != NULL; i++) + if (bitmap_bit_p (processed_allocno_bitmap, + ALLOCNO_NUM (conflict_allocno))) + return TRUE; + if (a == a2) + break; + } + return FALSE; +} + +/* The major function for aggressive coalescing. */ +static void +coalesce_allocnos (void) +{ + allocno_t a; + copy_t cp, next_cp, *sorted_copies; + enum reg_class cover_class; + enum machine_mode mode; + unsigned int j; + int i, n, cp_num; + bitmap_iterator bi; + + sorted_copies = ira_allocate (copies_num * sizeof (copy_t)); + cp_num = 0; + /* Collect copies. We can not use copies for this because some + copies are actually removed. */ + EXECUTE_IF_SET_IN_BITMAP (coloring_allocno_bitmap, 0, j, bi) + { + a = allocnos [j]; + cover_class = ALLOCNO_COVER_CLASS (a); + mode = ALLOCNO_MODE (a); + for (cp = ALLOCNO_COPIES (a); cp != NULL; cp = next_cp) + { + if (cp->first == a) + { + next_cp = cp->next_first_allocno_copy; + if (ALLOCNO_COVER_CLASS (cp->second) == cover_class + && ALLOCNO_MODE (cp->second) == mode +#if 0 + && (1 || cover_class == SSE_REGS || cover_class == MMX_REGS) +#endif + ) + sorted_copies [cp_num++] = cp; + } + else if (cp->second == a) + next_cp = cp->next_second_allocno_copy; + else + gcc_unreachable (); + } + } + qsort (sorted_copies, cp_num, sizeof (copy_t), copy_freq_compare_func); + for (;cp_num != 0;) + { + for (i = 0; i < cp_num; i++) + { + cp = sorted_copies [i]; + if (! allocno_conflict_p (cp->first, cp->second)) + { + if (ira_dump_file != NULL) + fprintf (stderr, "Coalescing copy %d (freq=%d)\n", + cp->num, cp->freq); + merge_allocnos (cp->first, cp->second); + i++; + break; + } + } + for (n = 0; i < cp_num; i++) + { + cp = sorted_copies [i]; + if (ALLOCNO_FIRST_COALESCED_ALLOCNO (cp->first) + != ALLOCNO_FIRST_COALESCED_ALLOCNO (cp->second)) + sorted_copies [n++] = cp; + } + cp_num = n; + } + ira_free (sorted_copies); +} + /* Function implements Chaitin-Briggs coloring for allocnos in COLORING_ALLOCNO_BITMAP taking into account allocnos in CONSIDERATION_ALLOCNO_BITMAP. */ @@ -849,6 +1166,9 @@ color_allocnos (void) unsigned int i; bitmap_iterator bi; + processed_allocno_bitmap = ira_allocate_bitmap (); + if (flag_ira_coalesce) + coalesce_allocnos (); /* Put the allocnos into the corresponding buckets. */ colorable_allocno_bucket = NULL; uncolorable_allocno_bucket = NULL; @@ -856,6 +1176,16 @@ color_allocnos (void) put_allocno_into_bucket (allocnos [i]); push_allocnos_to_stack (); pop_allocnos_from_stack (); + if (flag_ira_coalesce) + /* We don't need coalesced allocnos for retry_ira_color. */ + EXECUTE_IF_SET_IN_BITMAP (coloring_allocno_bitmap, 0, i, bi) + { + allocno_t a = allocnos [i]; + + ALLOCNO_FIRST_COALESCED_ALLOCNO (a) = a; + ALLOCNO_NEXT_COALESCED_ALLOCNO (a) = a; + } + ira_free_bitmap (processed_allocno_bitmap); } @@ -1076,6 +1406,7 @@ priority_coloring (void) int i, hard_regs_num; allocno_t a; + processed_allocno_bitmap = ira_allocate_bitmap (); memcpy (sorted_allocnos, allocnos, allocnos_num * sizeof (allocno_t)); for (i = 0; i < allocnos_num; i++) { @@ -1115,6 +1446,7 @@ priority_coloring (void) } ALLOCNO_IN_GRAPH_P (a) = TRUE; } + ira_free_bitmap (processed_allocno_bitmap); } /* The function initialized common data for cloring and calls diff --git a/gcc/ira-int.h b/gcc/ira-int.h index ebb6fe0744c..20054b772b7 100644 --- a/gcc/ira-int.h +++ b/gcc/ira-int.h @@ -237,6 +237,11 @@ struct allocno allocno_t prev_bucket_allocno; /* Used for temporary purposes. */ int temp; + /* Coalesced allocnos form a cyclic list. One allocno given by + FIRST_COALESCED_ALLOCNO represents all coalesced allocnos. The + list is chained by NEXT_COALESCED_ALLOCNO. */ + allocno_t first_coalesced_allocno; + allocno_t next_coalesced_allocno; }; /* All members of the allocno node should be accessed only through the @@ -281,6 +286,8 @@ struct allocno #define ALLOCNO_NEXT_BUCKET_ALLOCNO(P) ((P)->next_bucket_allocno) #define ALLOCNO_PREV_BUCKET_ALLOCNO(P) ((P)->prev_bucket_allocno) #define ALLOCNO_TEMP(P) ((P)->temp) +#define ALLOCNO_FIRST_COALESCED_ALLOCNO(P) ((P)->first_coalesced_allocno) +#define ALLOCNO_NEXT_COALESCED_ALLOCNO(P) ((P)->next_coalesced_allocno) /* Map regno -> allocno for the current loop tree node. */ extern allocno_t *regno_allocno_map; @@ -381,6 +388,9 @@ extern int register_move_cost [MAX_MACHINE_MODE] [N_REG_CLASSES] /* Register class subset relation. */ extern int class_subset_p [N_REG_CLASSES] [N_REG_CLASSES]; +/* The biggest class inside of intersection of the two classes. */ +extern enum reg_class reg_class_subintersect [N_REG_CLASSES] [N_REG_CLASSES]; + /* Hard registers which can be used for the allocation of given register class. */ extern short class_hard_regs [N_REG_CLASSES] [FIRST_PSEUDO_REGISTER]; diff --git a/gcc/ira.c b/gcc/ira.c index f1dd392e91b..f60126ddf4c 100644 --- a/gcc/ira.c +++ b/gcc/ira.c @@ -96,6 +96,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA static void setup_inner_mode (void); static void setup_reg_mode_hard_regset (void); static void setup_class_subset_and_move_costs (void); +static void setup_reg_class_intersect (void); static void setup_class_hard_regs (void); static void setup_available_class_regs (void); static void setup_alloc_regs (int); @@ -166,6 +167,9 @@ int register_move_cost [MAX_MACHINE_MODE] [N_REG_CLASSES] [N_REG_CLASSES]; 1st class is a subset of the 2nd class. */ int class_subset_p [N_REG_CLASSES] [N_REG_CLASSES]; +/* The biggest class inside of intersection of the two classes. */ +enum reg_class reg_class_subintersect [N_REG_CLASSES] [N_REG_CLASSES]; + /* Temporary hard reg set used for different calculation. */ static HARD_REG_SET temp_hard_regset; @@ -241,6 +245,31 @@ setup_class_subset_and_move_costs (void) } } +/* The function sets up REG_CLASS_SUBINTERSECT. */ +static void +setup_reg_class_intersect (void) +{ + int cl1, cl2, cl3; + HARD_REG_SET temp_set; + + for (cl1 = 0; cl1 < N_REG_CLASSES; cl1++) + { + for (cl2 = 0; cl2 < N_REG_CLASSES; cl2++) + { + reg_class_subintersect [cl1] [cl2] = NO_REGS; + COPY_HARD_REG_SET (temp_set, reg_class_contents [cl1]); + AND_HARD_REG_SET (temp_set, reg_class_contents [cl2]); + for (cl3 = 0; cl3 < N_REG_CLASSES; cl3++) + if (hard_reg_set_subset_p (reg_class_contents [cl3], temp_set) + && ! hard_reg_set_subset_p (reg_class_contents [cl3], + reg_class_contents + [(int) reg_class_subintersect + [cl1] [cl2]])) + reg_class_subintersect [cl1] [cl2] = (enum reg_class) cl3; + } + } +} + /* Hard registers can not be used for the register allocator for all @@ -740,6 +769,7 @@ init_ira_once (void) setup_inner_mode (); setup_reg_mode_hard_regset (); setup_class_subset_and_move_costs (); + setup_reg_class_intersect (); setup_alloc_regs (flag_omit_frame_pointer != 0); find_reg_class_closure (); setup_reg_class_nregs (); diff --git a/gcc/toplev.h b/gcc/toplev.h index 02e262e40e7..dbd5e55f187 100644 --- a/gcc/toplev.h +++ b/gcc/toplev.h @@ -138,6 +138,7 @@ extern int time_report; extern int flag_ira; extern int flag_ira_assign_after_call_split; extern int flag_ira_biased_coloring; +extern int flag_ira_coalesce; extern int flag_ira_ipra; extern int flag_ira_move_spills; extern int flag_ira_propagate_cost; -- 2.11.4.GIT