1 /* Matrix layout transformations.
2 Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
3 Contributed by Razya Ladelsky <razya@il.ibm.com>
4 Originally written by Revital Eres and Mustafa Hagog.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
23 Matrix flattening optimization tries to replace a N-dimensional
24 matrix with its equivalent M-dimensional matrix, where M < N.
25 This first implementation focuses on global matrices defined dynamically.
27 When N==1, we actually flatten the whole matrix.
28 For instance consider a two-dimensional array a [dim1] [dim2].
29 The code for allocating space for it usually looks like:
31 a = (int **) malloc(dim1 * sizeof(int *));
32 for (i=0; i<dim1; i++)
33 a[i] = (int *) malloc (dim2 * sizeof(int));
35 If the array "a" is found suitable for this optimization,
36 its allocation is replaced by:
38 a = (int *) malloc (dim1 * dim2 *sizeof(int));
40 and all the references to a[i][j] are replaced by a[i * dim2 + j].
42 The two main phases of the optimization are the analysis
44 The driver of the optimization is matrix_reorg ().
51 We'll number the dimensions outside-in, meaning the most external
52 is 0, then 1, and so on.
53 The analysis part of the optimization determines K, the escape
54 level of a N-dimensional matrix (K <= N), that allows flattening of
55 the external dimensions 0,1,..., K-1. Escape level 0 means that the
56 whole matrix escapes and no flattening is possible.
58 The analysis part is implemented in analyze_matrix_allocation_site()
59 and analyze_matrix_accesses().
63 In this phase we define the new flattened matrices that replace the
64 original matrices in the code.
65 Implemented in transform_allocation_sites(),
66 transform_access_sites().
70 The idea of Matrix Transposing is organizing the matrix in a different
71 layout such that the dimensions are reordered.
72 This could produce better cache behavior in some cases.
74 For example, lets look at the matrix accesses in the following loop:
80 This loop can produce good cache behavior because the elements of
81 the inner dimension are accessed sequentially.
83 However, if the accesses of the matrix were of the following form:
89 In this loop we iterate the columns and not the rows.
90 Therefore, replacing the rows and columns
91 would have had an organization with better (cache) locality.
92 Replacing the dimensions of the matrix is called matrix transposing.
94 This example, of course, could be enhanced to multiple dimensions matrices
97 Since a program could include all kind of accesses, there is a decision
98 mechanism, implemented in analyze_transpose(), which implements a
99 heuristic that tries to determine whether to transpose the matrix or not,
100 according to the form of the more dominant accesses.
101 This decision is transferred to the flattening mechanism, and whether
102 the matrix was transposed or not, the matrix is flattened (if possible).
104 This decision making is based on profiling information and loop information.
105 If profiling information is available, decision making mechanism will be
106 operated, otherwise the matrix will only be flattened (if possible).
108 Both optimizations are described in the paper "Matrix flattening and
109 transposing in GCC" which was presented in GCC summit 2006.
110 http://www.gccsummit.org/2006/2006-GCC-Summit-Proceedings.pdf. */
114 #include "coretypes.h"
118 #include "tree-inline.h"
119 #include "tree-flow.h"
120 #include "tree-flow-inline.h"
121 #include "langhooks.h"
129 #include "diagnostic.h"
134 #include "function.h"
135 #include "basic-block.h"
137 #include "tree-iterator.h"
138 #include "tree-pass.h"
140 #include "tree-data-ref.h"
141 #include "tree-chrec.h"
142 #include "tree-scalar-evolution.h"
143 #include "tree-ssa-sccvn.h"
145 /* We need to collect a lot of data from the original malloc,
146 particularly as the gimplifier has converted:
148 orig_var = (struct_type *) malloc (x * sizeof (struct_type *));
152 T3 = <constant> ; ** <constant> is amount to malloc; precomputed **
154 T5 = (struct_type *) T4;
157 The following struct fields allow us to collect all the necessary data from
158 the gimplified program. The comments in the struct below are all based
159 on the gimple example above. */
161 struct malloc_call_data
163 gimple call_stmt
; /* Tree for "T4 = malloc (T3);" */
164 tree size_var
; /* Var decl for T3. */
165 tree malloc_size
; /* Tree for "<constant>", the rhs assigned to T3. */
168 static tree
can_calculate_expr_before_stmt (tree
, sbitmap
);
169 static tree
can_calculate_stmt_before_stmt (gimple
, sbitmap
);
171 /* The front end of the compiler, when parsing statements of the form:
173 var = (type_cast) malloc (sizeof (type));
175 always converts this single statement into the following statements
180 T.3 = (type_cast) T.2;
183 Since we need to create new malloc statements and modify the original
184 statements somewhat, we need to find all four of the above statements.
185 Currently record_call_1 (called for building cgraph edges) finds and
186 records the statements containing the actual call to malloc, but we
187 need to find the rest of the variables/statements on our own. That
188 is what the following function does. */
190 collect_data_for_malloc_call (gimple stmt
, struct malloc_call_data
*m_data
)
192 tree size_var
= NULL
;
196 gcc_assert (is_gimple_call (stmt
));
198 malloc_fn_decl
= gimple_call_fndecl (stmt
);
199 if (malloc_fn_decl
== NULL
200 || DECL_FUNCTION_CODE (malloc_fn_decl
) != BUILT_IN_MALLOC
)
203 arg1
= gimple_call_arg (stmt
, 0);
206 m_data
->call_stmt
= stmt
;
207 m_data
->size_var
= size_var
;
208 if (TREE_CODE (size_var
) != VAR_DECL
)
209 m_data
->malloc_size
= size_var
;
211 m_data
->malloc_size
= NULL_TREE
;
214 /* Information about matrix access site.
215 For example: if an access site of matrix arr is arr[i][j]
216 the ACCESS_SITE_INFO structure will have the address
217 of arr as its stmt. The INDEX_INFO will hold information about the
218 initial address and index of each dimension. */
219 struct access_site_info
221 /* The statement (INDIRECT_REF or POINTER_PLUS_EXPR). */
224 /* In case of POINTER_PLUS_EXPR, what is the offset. */
227 /* The index which created the offset. */
230 /* The indirection level of this statement. */
233 /* TRUE for allocation site FALSE for access site. */
236 /* The function containing the access site. */
239 /* This access is iterated in the inner most loop */
240 bool iterated_by_inner_most_loop_p
;
243 typedef struct access_site_info
*access_site_info_p
;
244 DEF_VEC_P (access_site_info_p
);
245 DEF_VEC_ALLOC_P (access_site_info_p
, heap
);
247 /* Calls to free when flattening a matrix. */
255 /* Information about matrix to flatten. */
258 /* Decl tree of this matrix. */
260 /* Number of dimensions; number
261 of "*" in the type declaration. */
264 /* Minimum indirection level that escapes, 0 means that
265 the whole matrix escapes, k means that dimensions
266 0 to ACTUAL_DIM - k escapes. */
267 int min_indirect_level_escape
;
269 gimple min_indirect_level_escape_stmt
;
271 /* Hold the allocation site for each level (dimension).
272 We can use NUM_DIMS as the upper bound and allocate the array
273 once with this number of elements and no need to use realloc and
274 MAX_MALLOCED_LEVEL. */
275 gimple
*malloc_for_level
;
277 int max_malloced_level
;
279 /* Is the matrix transposed. */
280 bool is_transposed_p
;
282 /* The location of the allocation sites (they must be in one
284 tree allocation_function_decl
;
286 /* The calls to free for each level of indirection. */
287 struct free_info
*free_stmts
;
289 /* An array which holds for each dimension its size. where
290 dimension 0 is the outer most (one that contains all the others).
292 tree
*dimension_size
;
294 /* An array which holds for each dimension it's original size
295 (before transposing and flattening take place). */
296 tree
*dimension_size_orig
;
298 /* An array which holds for each dimension the size of the type of
299 of elements accessed in that level (in bytes). */
300 HOST_WIDE_INT
*dimension_type_size
;
302 int dimension_type_size_len
;
304 /* An array collecting the count of accesses for each dimension. */
305 gcov_type
*dim_hot_level
;
307 /* An array of the accesses to be flattened.
308 elements are of type "struct access_site_info *". */
309 VEC (access_site_info_p
, heap
) * access_l
;
311 /* A map of how the dimensions will be organized at the end of
316 /* In each phi node we want to record the indirection level we have when we
317 get to the phi node. Usually we will have phi nodes with more than two
318 arguments, then we must assure that all of them get to the phi node with
319 the same indirection level, otherwise it's not safe to do the flattening.
320 So we record the information regarding the indirection level each time we
321 get to the phi node in this hash table. */
323 struct matrix_access_phi_node
326 int indirection_level
;
329 /* We use this structure to find if the SSA variable is accessed inside the
330 tree and record the tree containing it. */
332 struct ssa_acc_in_tree
334 /* The variable whose accesses in the tree we are looking for. */
336 /* The tree and code inside it the ssa_var is accessed, currently
337 it could be an INDIRECT_REF or CALL_EXPR. */
338 enum tree_code t_code
;
340 /* The place in the containing tree. */
346 static void analyze_matrix_accesses (struct matrix_info
*, tree
, int, bool,
348 static int transform_allocation_sites (void **, void *);
349 static int transform_access_sites (void **, void *);
350 static int analyze_transpose (void **, void *);
351 static int dump_matrix_reorg_analysis (void **, void *);
353 static bool check_transpose_p
;
355 /* Hash function used for the phi nodes. */
358 mat_acc_phi_hash (const void *p
)
360 const struct matrix_access_phi_node
*const ma_phi
=
361 (const struct matrix_access_phi_node
*) p
;
363 return htab_hash_pointer (ma_phi
->phi
);
366 /* Equality means phi node pointers are the same. */
369 mat_acc_phi_eq (const void *p1
, const void *p2
)
371 const struct matrix_access_phi_node
*const phi1
=
372 (const struct matrix_access_phi_node
*) p1
;
373 const struct matrix_access_phi_node
*const phi2
=
374 (const struct matrix_access_phi_node
*) p2
;
376 if (phi1
->phi
== phi2
->phi
)
382 /* Hold the PHI nodes we visit during the traversal for escaping
384 static htab_t htab_mat_acc_phi_nodes
= NULL
;
386 /* This hash-table holds the information about the matrices we are
388 static htab_t matrices_to_reorg
= NULL
;
390 /* Return a hash for MTT, which is really a "matrix_info *". */
392 mtt_info_hash (const void *mtt
)
394 return htab_hash_pointer (((const struct matrix_info
*) mtt
)->decl
);
397 /* Return true if MTT1 and MTT2 (which are really both of type
398 "matrix_info *") refer to the same decl. */
400 mtt_info_eq (const void *mtt1
, const void *mtt2
)
402 const struct matrix_info
*const i1
= (const struct matrix_info
*) mtt1
;
403 const struct matrix_info
*const i2
= (const struct matrix_info
*) mtt2
;
405 if (i1
->decl
== i2
->decl
)
411 /* Return false if STMT may contain a vector expression.
412 In this situation, all matrices should not be flattened. */
414 may_flatten_matrices_1 (gimple stmt
)
418 switch (gimple_code (stmt
))
421 if (!gimple_assign_cast_p (stmt
))
424 t
= gimple_assign_rhs1 (stmt
);
425 while (CONVERT_EXPR_P (t
))
427 if (TREE_TYPE (t
) && POINTER_TYPE_P (TREE_TYPE (t
)))
431 pointee
= TREE_TYPE (t
);
432 while (POINTER_TYPE_P (pointee
))
433 pointee
= TREE_TYPE (pointee
);
434 if (TREE_CODE (pointee
) == VECTOR_TYPE
)
438 "Found vector type, don't flatten matrix\n");
442 t
= TREE_OPERAND (t
, 0);
446 /* Asm code could contain vector operations. */
455 /* Return false if there are hand-written vectors in the program.
456 We disable the flattening in such a case. */
458 may_flatten_matrices (struct cgraph_node
*node
)
461 struct function
*func
;
463 gimple_stmt_iterator gsi
;
468 func
= DECL_STRUCT_FUNCTION (decl
);
469 FOR_EACH_BB_FN (bb
, func
)
470 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
471 if (!may_flatten_matrices_1 (gsi_stmt (gsi
)))
477 /* Given a VAR_DECL, check its type to determine whether it is
478 a definition of a dynamic allocated matrix and therefore is
479 a suitable candidate for the matrix flattening optimization.
480 Return NULL if VAR_DECL is not such decl. Otherwise, allocate
481 a MATRIX_INFO structure, fill it with the relevant information
482 and return a pointer to it.
483 TODO: handle also statically defined arrays. */
484 static struct matrix_info
*
485 analyze_matrix_decl (tree var_decl
)
487 struct matrix_info
*m_node
, tmpmi
, *mi
;
491 gcc_assert (matrices_to_reorg
);
493 if (TREE_CODE (var_decl
) == PARM_DECL
)
494 var_type
= DECL_ARG_TYPE (var_decl
);
495 else if (TREE_CODE (var_decl
) == VAR_DECL
)
496 var_type
= TREE_TYPE (var_decl
);
500 if (!POINTER_TYPE_P (var_type
))
503 while (POINTER_TYPE_P (var_type
))
505 var_type
= TREE_TYPE (var_type
);
512 if (!COMPLETE_TYPE_P (var_type
)
513 || TREE_CODE (TYPE_SIZE_UNIT (var_type
)) != INTEGER_CST
)
516 /* Check to see if this pointer is already in there. */
517 tmpmi
.decl
= var_decl
;
518 mi
= (struct matrix_info
*) htab_find (matrices_to_reorg
, &tmpmi
);
523 /* Record the matrix. */
525 m_node
= (struct matrix_info
*) xcalloc (1, sizeof (struct matrix_info
));
526 m_node
->decl
= var_decl
;
527 m_node
->num_dims
= dim_num
;
529 = (struct free_info
*) xcalloc (dim_num
, sizeof (struct free_info
));
531 /* Init min_indirect_level_escape to -1 to indicate that no escape
532 analysis has been done yet. */
533 m_node
->min_indirect_level_escape
= -1;
534 m_node
->is_transposed_p
= false;
543 struct matrix_info
*mat
= (struct matrix_info
*) e
;
549 free (mat
->free_stmts
);
550 if (mat
->dim_hot_level
)
551 free (mat
->dim_hot_level
);
552 if (mat
->malloc_for_level
)
553 free (mat
->malloc_for_level
);
556 /* Find all potential matrices.
557 TODO: currently we handle only multidimensional
558 dynamically allocated arrays. */
560 find_matrices_decl (void)
562 struct matrix_info
*tmp
;
564 struct varpool_node
*vnode
;
566 gcc_assert (matrices_to_reorg
);
568 /* For every global variable in the program:
569 Check to see if it's of a candidate type and record it. */
570 for (vnode
= varpool_nodes_queue
; vnode
; vnode
= vnode
->next_needed
)
572 tree var_decl
= vnode
->decl
;
574 if (!var_decl
|| TREE_CODE (var_decl
) != VAR_DECL
)
577 if (matrices_to_reorg
)
578 if ((tmp
= analyze_matrix_decl (var_decl
)))
580 if (!TREE_ADDRESSABLE (var_decl
))
582 slot
= htab_find_slot (matrices_to_reorg
, tmp
, INSERT
);
590 /* Mark that the matrix MI escapes at level L. */
592 mark_min_matrix_escape_level (struct matrix_info
*mi
, int l
, gimple s
)
594 if (mi
->min_indirect_level_escape
== -1
595 || (mi
->min_indirect_level_escape
> l
))
597 mi
->min_indirect_level_escape
= l
;
598 mi
->min_indirect_level_escape_stmt
= s
;
602 /* Find if the SSA variable is accessed inside the
603 tree and record the tree containing it.
604 The only relevant uses are the case of SSA_NAME, or SSA inside
605 INDIRECT_REF, PLUS_EXPR, POINTER_PLUS_EXPR, MULT_EXPR. */
607 ssa_accessed_in_tree (tree t
, struct ssa_acc_in_tree
*a
)
609 a
->t_code
= TREE_CODE (t
);
617 if (SSA_VAR_P (TREE_OPERAND (t
, 0))
618 && TREE_OPERAND (t
, 0) == a
->ssa_var
)
626 /* Find if the SSA variable is accessed on the right hand side of
630 ssa_accessed_in_call_rhs (gimple stmt
, struct ssa_acc_in_tree
*a
)
636 a
->t_code
= CALL_EXPR
;
637 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
639 arg
= gimple_call_arg (stmt
, i
);
640 if (arg
== a
->ssa_var
)
643 decl
= gimple_call_fndecl (stmt
);
650 /* Find if the SSA variable is accessed on the right hand side of
651 gimple assign STMT. */
654 ssa_accessed_in_assign_rhs (gimple stmt
, struct ssa_acc_in_tree
*a
)
657 a
->t_code
= gimple_assign_rhs_code (stmt
);
665 case VIEW_CONVERT_EXPR
:
666 ssa_accessed_in_tree (gimple_assign_rhs1 (stmt
), a
);
668 case POINTER_PLUS_EXPR
:
671 op1
= gimple_assign_rhs1 (stmt
);
672 op2
= gimple_assign_rhs2 (stmt
);
674 if (op1
== a
->ssa_var
)
679 else if (op2
== a
->ssa_var
)
690 /* Record the access/allocation site information for matrix MI so we can
691 handle it later in transformation. */
693 record_access_alloc_site_info (struct matrix_info
*mi
, gimple stmt
, tree offset
,
694 tree index
, int level
, bool is_alloc
)
696 struct access_site_info
*acc_info
;
699 mi
->access_l
= VEC_alloc (access_site_info_p
, heap
, 100);
702 = (struct access_site_info
*)
703 xcalloc (1, sizeof (struct access_site_info
));
704 acc_info
->stmt
= stmt
;
705 acc_info
->offset
= offset
;
706 acc_info
->index
= index
;
707 acc_info
->function_decl
= current_function_decl
;
708 acc_info
->level
= level
;
709 acc_info
->is_alloc
= is_alloc
;
711 VEC_safe_push (access_site_info_p
, heap
, mi
->access_l
, acc_info
);
715 /* Record the malloc as the allocation site of the given LEVEL. But
716 first we Make sure that all the size parameters passed to malloc in
717 all the allocation sites could be pre-calculated before the call to
718 the malloc of level 0 (the main malloc call). */
720 add_allocation_site (struct matrix_info
*mi
, gimple stmt
, int level
)
722 struct malloc_call_data mcd
;
724 /* Make sure that the allocation sites are in the same function. */
725 if (!mi
->allocation_function_decl
)
726 mi
->allocation_function_decl
= current_function_decl
;
727 else if (mi
->allocation_function_decl
!= current_function_decl
)
729 int min_malloc_level
;
731 gcc_assert (mi
->malloc_for_level
);
733 /* Find the minimum malloc level that already has been seen;
734 we known its allocation function must be
735 MI->allocation_function_decl since it's different than
736 CURRENT_FUNCTION_DECL then the escaping level should be
737 MIN (LEVEL, MIN_MALLOC_LEVEL) - 1 , and the allocation function
738 must be set accordingly. */
739 for (min_malloc_level
= 0;
740 min_malloc_level
< mi
->max_malloced_level
741 && mi
->malloc_for_level
[min_malloc_level
]; min_malloc_level
++);
742 if (level
< min_malloc_level
)
744 mi
->allocation_function_decl
= current_function_decl
;
745 mark_min_matrix_escape_level (mi
, min_malloc_level
, stmt
);
749 mark_min_matrix_escape_level (mi
, level
, stmt
);
750 /* cannot be that (level == min_malloc_level)
751 we would have returned earlier. */
756 /* Find the correct malloc information. */
757 collect_data_for_malloc_call (stmt
, &mcd
);
759 /* We accept only calls to malloc function; we do not accept
760 calls like calloc and realloc. */
761 if (!mi
->malloc_for_level
)
763 mi
->malloc_for_level
= XCNEWVEC (gimple
, level
+ 1);
764 mi
->max_malloced_level
= level
+ 1;
766 else if (mi
->max_malloced_level
<= level
)
769 = XRESIZEVEC (gimple
, mi
->malloc_for_level
, level
+ 1);
771 /* Zero the newly allocated items. */
772 memset (&(mi
->malloc_for_level
[mi
->max_malloced_level
+ 1]),
773 0, (level
- mi
->max_malloced_level
) * sizeof (tree
));
775 mi
->max_malloced_level
= level
+ 1;
777 mi
->malloc_for_level
[level
] = stmt
;
780 /* Given an assignment statement STMT that we know that its
781 left-hand-side is the matrix MI variable, we traverse the immediate
782 uses backwards until we get to a malloc site. We make sure that
783 there is one and only one malloc site that sets this variable. When
784 we are performing the flattening we generate a new variable that
785 will hold the size for each dimension; each malloc that allocates a
786 dimension has the size parameter; we use that parameter to
787 initialize the dimension size variable so we can use it later in
788 the address calculations. LEVEL is the dimension we're inspecting.
789 Return if STMT is related to an allocation site. */
792 analyze_matrix_allocation_site (struct matrix_info
*mi
, gimple stmt
,
793 int level
, sbitmap visited
)
795 if (gimple_assign_copy_p (stmt
) || gimple_assign_cast_p (stmt
))
797 tree rhs
= gimple_assign_rhs1 (stmt
);
799 if (TREE_CODE (rhs
) == SSA_NAME
)
801 gimple def
= SSA_NAME_DEF_STMT (rhs
);
803 analyze_matrix_allocation_site (mi
, def
, level
, visited
);
806 /* If we are back to the original matrix variable then we
807 are sure that this is analyzed as an access site. */
808 else if (rhs
== mi
->decl
)
811 /* A result of call to malloc. */
812 else if (is_gimple_call (stmt
))
814 int call_flags
= gimple_call_flags (stmt
);
816 if (!(call_flags
& ECF_MALLOC
))
818 mark_min_matrix_escape_level (mi
, level
, stmt
);
825 malloc_fn_decl
= gimple_call_fndecl (stmt
);
826 if (malloc_fn_decl
== NULL_TREE
)
828 mark_min_matrix_escape_level (mi
, level
, stmt
);
831 if (DECL_FUNCTION_CODE (malloc_fn_decl
) != BUILT_IN_MALLOC
)
835 "Matrix %s is an argument to function %s\n",
836 get_name (mi
->decl
), get_name (malloc_fn_decl
));
837 mark_min_matrix_escape_level (mi
, level
, stmt
);
841 /* This is a call to malloc of level 'level'.
842 mi->max_malloced_level-1 == level means that we've
843 seen a malloc statement of level 'level' before.
844 If the statement is not the same one that we've
845 seen before, then there's another malloc statement
846 for the same level, which means that we need to mark
848 if (mi
->malloc_for_level
849 && mi
->max_malloced_level
-1 == level
850 && mi
->malloc_for_level
[level
] != stmt
)
852 mark_min_matrix_escape_level (mi
, level
, stmt
);
856 add_allocation_site (mi
, stmt
, level
);
859 /* Looks like we don't know what is happening in this
860 statement so be in the safe side and mark it as escaping. */
861 mark_min_matrix_escape_level (mi
, level
, stmt
);
864 /* The transposing decision making.
865 In order to to calculate the profitability of transposing, we collect two
866 types of information regarding the accesses:
867 1. profiling information used to express the hotness of an access, that
868 is how often the matrix is accessed by this access site (count of the
870 2. which dimension in the access site is iterated by the inner
871 most loop containing this access.
873 The matrix will have a calculated value of weighted hotness for each
875 Intuitively the hotness level of a dimension is a function of how
876 many times it was the most frequently accessed dimension in the
877 highly executed access sites of this matrix.
879 As computed by following equation:
882 \ \ dim_hot_level[i] +=
885 acc[j]->dim[i]->iter_by_inner_loop * count(j)
887 Where n is the number of dims and m is the number of the matrix
888 access sites. acc[j]->dim[i]->iter_by_inner_loop is 1 if acc[j]
889 iterates over dim[i] in innermost loop, and is 0 otherwise.
891 The organization of the new matrix should be according to the
892 hotness of each dimension. The hotness of the dimension implies
893 the locality of the elements.*/
895 analyze_transpose (void **slot
, void *data ATTRIBUTE_UNUSED
)
897 struct matrix_info
*mi
= (struct matrix_info
*) *slot
;
898 int min_escape_l
= mi
->min_indirect_level_escape
;
901 struct access_site_info
*acc_info
;
904 if (min_escape_l
< 2 || !mi
->access_l
)
909 VEC_iterate (access_site_info_p
, mi
->access_l
, i
, acc_info
);
912 VEC_free (access_site_info_p
, heap
, mi
->access_l
);
917 if (!mi
->dim_hot_level
)
919 (gcov_type
*) xcalloc (min_escape_l
, sizeof (gcov_type
));
922 for (i
= 0; VEC_iterate (access_site_info_p
, mi
->access_l
, i
, acc_info
);
925 if (gimple_assign_rhs_code (acc_info
->stmt
) == POINTER_PLUS_EXPR
926 && acc_info
->level
< min_escape_l
)
928 loop
= loop_containing_stmt (acc_info
->stmt
);
929 if (!loop
|| loop
->inner
)
934 if (simple_iv (loop
, loop
, acc_info
->offset
, &iv
, true))
940 istep
= int_cst_value (iv
.step
);
943 acc_info
->iterated_by_inner_most_loop_p
= 1;
944 mi
->dim_hot_level
[acc_info
->level
] +=
945 gimple_bb (acc_info
->stmt
)->count
;
953 VEC_free (access_site_info_p
, heap
, mi
->access_l
);
958 /* Find the index which defines the OFFSET from base.
959 We walk from use to def until we find how the offset was defined. */
961 get_index_from_offset (tree offset
, gimple def_stmt
)
963 tree op1
, op2
, index
;
965 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
967 if ((gimple_assign_copy_p (def_stmt
) || gimple_assign_cast_p (def_stmt
))
968 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == SSA_NAME
)
969 return get_index_from_offset (offset
,
970 SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt
)));
971 else if (is_gimple_assign (def_stmt
)
972 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
)
974 op1
= gimple_assign_rhs1 (def_stmt
);
975 op2
= gimple_assign_rhs2 (def_stmt
);
976 if (TREE_CODE (op1
) != INTEGER_CST
&& TREE_CODE (op2
) != INTEGER_CST
)
978 index
= (TREE_CODE (op1
) == INTEGER_CST
) ? op2
: op1
;
985 /* update MI->dimension_type_size[CURRENT_INDIRECT_LEVEL] with the size
986 of the type related to the SSA_VAR, or the type related to the
987 lhs of STMT, in the case that it is an INDIRECT_REF. */
989 update_type_size (struct matrix_info
*mi
, gimple stmt
, tree ssa_var
,
990 int current_indirect_level
)
993 HOST_WIDE_INT type_size
;
995 /* Update type according to the type of the INDIRECT_REF expr. */
996 if (is_gimple_assign (stmt
)
997 && TREE_CODE (gimple_assign_lhs (stmt
)) == INDIRECT_REF
)
999 lhs
= gimple_assign_lhs (stmt
);
1000 gcc_assert (POINTER_TYPE_P
1001 (TREE_TYPE (SSA_NAME_VAR (TREE_OPERAND (lhs
, 0)))));
1003 int_size_in_bytes (TREE_TYPE
1005 (SSA_NAME_VAR (TREE_OPERAND (lhs
, 0)))));
1008 type_size
= int_size_in_bytes (TREE_TYPE (ssa_var
));
1010 /* Record the size of elements accessed (as a whole)
1011 in the current indirection level (dimension). If the size of
1012 elements is not known at compile time, mark it as escaping. */
1014 mark_min_matrix_escape_level (mi
, current_indirect_level
, stmt
);
1017 int l
= current_indirect_level
;
1019 if (!mi
->dimension_type_size
)
1021 mi
->dimension_type_size
1022 = (HOST_WIDE_INT
*) xcalloc (l
+ 1, sizeof (HOST_WIDE_INT
));
1023 mi
->dimension_type_size_len
= l
+ 1;
1025 else if (mi
->dimension_type_size_len
< l
+ 1)
1027 mi
->dimension_type_size
1028 = (HOST_WIDE_INT
*) xrealloc (mi
->dimension_type_size
,
1029 (l
+ 1) * sizeof (HOST_WIDE_INT
));
1030 memset (&mi
->dimension_type_size
[mi
->dimension_type_size_len
],
1031 0, (l
+ 1 - mi
->dimension_type_size_len
)
1032 * sizeof (HOST_WIDE_INT
));
1033 mi
->dimension_type_size_len
= l
+ 1;
1035 /* Make sure all the accesses in the same level have the same size
1037 if (!mi
->dimension_type_size
[l
])
1038 mi
->dimension_type_size
[l
] = type_size
;
1039 else if (mi
->dimension_type_size
[l
] != type_size
)
1040 mark_min_matrix_escape_level (mi
, l
, stmt
);
1044 /* USE_STMT represents a GIMPLE_CALL, where one of the arguments is the
1045 ssa var that we want to check because it came from some use of matrix
1046 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so
1050 analyze_accesses_for_call_stmt (struct matrix_info
*mi
, tree ssa_var
,
1051 gimple use_stmt
, int current_indirect_level
)
1053 tree fndecl
= gimple_call_fndecl (use_stmt
);
1055 if (gimple_call_lhs (use_stmt
))
1057 tree lhs
= gimple_call_lhs (use_stmt
);
1058 struct ssa_acc_in_tree lhs_acc
, rhs_acc
;
1060 memset (&lhs_acc
, 0, sizeof (lhs_acc
));
1061 memset (&rhs_acc
, 0, sizeof (rhs_acc
));
1063 lhs_acc
.ssa_var
= ssa_var
;
1064 lhs_acc
.t_code
= ERROR_MARK
;
1065 ssa_accessed_in_tree (lhs
, &lhs_acc
);
1066 rhs_acc
.ssa_var
= ssa_var
;
1067 rhs_acc
.t_code
= ERROR_MARK
;
1068 ssa_accessed_in_call_rhs (use_stmt
, &rhs_acc
);
1070 /* The SSA must be either in the left side or in the right side,
1071 to understand what is happening.
1072 In case the SSA_NAME is found in both sides we should be escaping
1073 at this level because in this case we cannot calculate the
1074 address correctly. */
1075 if ((lhs_acc
.var_found
&& rhs_acc
.var_found
1076 && lhs_acc
.t_code
== INDIRECT_REF
)
1077 || (!rhs_acc
.var_found
&& !lhs_acc
.var_found
))
1079 mark_min_matrix_escape_level (mi
, current_indirect_level
, use_stmt
);
1080 return current_indirect_level
;
1082 gcc_assert (!rhs_acc
.var_found
|| !lhs_acc
.var_found
);
1084 /* If we are storing to the matrix at some level, then mark it as
1085 escaping at that level. */
1086 if (lhs_acc
.var_found
)
1088 int l
= current_indirect_level
+ 1;
1090 gcc_assert (lhs_acc
.t_code
== INDIRECT_REF
);
1091 mark_min_matrix_escape_level (mi
, l
, use_stmt
);
1092 return current_indirect_level
;
1098 if (DECL_FUNCTION_CODE (fndecl
) != BUILT_IN_FREE
)
1102 "Matrix %s: Function call %s, level %d escapes.\n",
1103 get_name (mi
->decl
), get_name (fndecl
),
1104 current_indirect_level
);
1105 mark_min_matrix_escape_level (mi
, current_indirect_level
, use_stmt
);
1107 else if (mi
->free_stmts
[current_indirect_level
].stmt
!= NULL
1108 && mi
->free_stmts
[current_indirect_level
].stmt
!= use_stmt
)
1109 mark_min_matrix_escape_level (mi
, current_indirect_level
, use_stmt
);
1112 /*Record the free statements so we can delete them
1114 int l
= current_indirect_level
;
1116 mi
->free_stmts
[l
].stmt
= use_stmt
;
1117 mi
->free_stmts
[l
].func
= current_function_decl
;
1120 return current_indirect_level
;
1123 /* USE_STMT represents a phi node of the ssa var that we want to
1124 check because it came from some use of matrix
1126 We check all the escaping levels that get to the PHI node
1127 and make sure they are all the same escaping;
1128 if not (which is rare) we let the escaping level be the
1129 minimum level that gets into that PHI because starting from
1130 that level we cannot expect the behavior of the indirections.
1131 CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1134 analyze_accesses_for_phi_node (struct matrix_info
*mi
, gimple use_stmt
,
1135 int current_indirect_level
, sbitmap visited
,
1136 bool record_accesses
)
1139 struct matrix_access_phi_node tmp_maphi
, *maphi
, **pmaphi
;
1141 tmp_maphi
.phi
= use_stmt
;
1142 if ((maphi
= (struct matrix_access_phi_node
*)
1143 htab_find (htab_mat_acc_phi_nodes
, &tmp_maphi
)))
1145 if (maphi
->indirection_level
== current_indirect_level
)
1149 int level
= MIN (maphi
->indirection_level
,
1150 current_indirect_level
);
1154 maphi
->indirection_level
= level
;
1155 for (j
= 0; j
< gimple_phi_num_args (use_stmt
); j
++)
1157 tree def
= PHI_ARG_DEF (use_stmt
, j
);
1159 if (gimple_code (SSA_NAME_DEF_STMT (def
)) != GIMPLE_PHI
)
1160 stmt
= SSA_NAME_DEF_STMT (def
);
1162 mark_min_matrix_escape_level (mi
, level
, stmt
);
1166 maphi
= (struct matrix_access_phi_node
*)
1167 xcalloc (1, sizeof (struct matrix_access_phi_node
));
1168 maphi
->phi
= use_stmt
;
1169 maphi
->indirection_level
= current_indirect_level
;
1171 /* Insert to hash table. */
1172 pmaphi
= (struct matrix_access_phi_node
**)
1173 htab_find_slot (htab_mat_acc_phi_nodes
, maphi
, INSERT
);
1174 gcc_assert (pmaphi
);
1177 if (!TEST_BIT (visited
, SSA_NAME_VERSION (PHI_RESULT (use_stmt
))))
1179 SET_BIT (visited
, SSA_NAME_VERSION (PHI_RESULT (use_stmt
)));
1180 analyze_matrix_accesses (mi
, PHI_RESULT (use_stmt
),
1181 current_indirect_level
, false, visited
,
1183 RESET_BIT (visited
, SSA_NAME_VERSION (PHI_RESULT (use_stmt
)));
1187 /* USE_STMT represents an assign statement (the rhs or lhs include
1188 the ssa var that we want to check because it came from some use of matrix
1189 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1192 analyze_accesses_for_assign_stmt (struct matrix_info
*mi
, tree ssa_var
,
1193 gimple use_stmt
, int current_indirect_level
,
1194 bool last_op
, sbitmap visited
,
1195 bool record_accesses
)
1197 tree lhs
= gimple_get_lhs (use_stmt
);
1198 struct ssa_acc_in_tree lhs_acc
, rhs_acc
;
1200 memset (&lhs_acc
, 0, sizeof (lhs_acc
));
1201 memset (&rhs_acc
, 0, sizeof (rhs_acc
));
1203 lhs_acc
.ssa_var
= ssa_var
;
1204 lhs_acc
.t_code
= ERROR_MARK
;
1205 ssa_accessed_in_tree (lhs
, &lhs_acc
);
1206 rhs_acc
.ssa_var
= ssa_var
;
1207 rhs_acc
.t_code
= ERROR_MARK
;
1208 ssa_accessed_in_assign_rhs (use_stmt
, &rhs_acc
);
1210 /* The SSA must be either in the left side or in the right side,
1211 to understand what is happening.
1212 In case the SSA_NAME is found in both sides we should be escaping
1213 at this level because in this case we cannot calculate the
1214 address correctly. */
1215 if ((lhs_acc
.var_found
&& rhs_acc
.var_found
1216 && lhs_acc
.t_code
== INDIRECT_REF
)
1217 || (!rhs_acc
.var_found
&& !lhs_acc
.var_found
))
1219 mark_min_matrix_escape_level (mi
, current_indirect_level
, use_stmt
);
1220 return current_indirect_level
;
1222 gcc_assert (!rhs_acc
.var_found
|| !lhs_acc
.var_found
);
1224 /* If we are storing to the matrix at some level, then mark it as
1225 escaping at that level. */
1226 if (lhs_acc
.var_found
)
1228 int l
= current_indirect_level
+ 1;
1230 gcc_assert (lhs_acc
.t_code
== INDIRECT_REF
);
1232 if (!(gimple_assign_copy_p (use_stmt
)
1233 || gimple_assign_cast_p (use_stmt
))
1234 || (TREE_CODE (gimple_assign_rhs1 (use_stmt
)) != SSA_NAME
))
1235 mark_min_matrix_escape_level (mi
, l
, use_stmt
);
1238 gimple def_stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (use_stmt
));
1239 analyze_matrix_allocation_site (mi
, def_stmt
, l
, visited
);
1240 if (record_accesses
)
1241 record_access_alloc_site_info (mi
, use_stmt
, NULL_TREE
,
1242 NULL_TREE
, l
, true);
1243 update_type_size (mi
, use_stmt
, NULL
, l
);
1245 return current_indirect_level
;
1247 /* Now, check the right-hand-side, to see how the SSA variable
1249 if (rhs_acc
.var_found
)
1251 if (rhs_acc
.t_code
!= INDIRECT_REF
1252 && rhs_acc
.t_code
!= POINTER_PLUS_EXPR
&& rhs_acc
.t_code
!= SSA_NAME
)
1254 mark_min_matrix_escape_level (mi
, current_indirect_level
, use_stmt
);
1255 return current_indirect_level
;
1257 /* If the access in the RHS has an indirection increase the
1258 indirection level. */
1259 if (rhs_acc
.t_code
== INDIRECT_REF
)
1261 if (record_accesses
)
1262 record_access_alloc_site_info (mi
, use_stmt
, NULL_TREE
,
1264 current_indirect_level
, true);
1265 current_indirect_level
+= 1;
1267 else if (rhs_acc
.t_code
== POINTER_PLUS_EXPR
)
1269 gcc_assert (rhs_acc
.second_op
);
1271 /* Currently we support only one PLUS expression on the
1272 SSA_NAME that holds the base address of the current
1273 indirection level; to support more general case there
1274 is a need to hold a stack of expressions and regenerate
1275 the calculation later. */
1276 mark_min_matrix_escape_level (mi
, current_indirect_level
,
1283 op1
= gimple_assign_rhs1 (use_stmt
);
1284 op2
= gimple_assign_rhs2 (use_stmt
);
1286 op2
= (op1
== ssa_var
) ? op2
: op1
;
1287 if (TREE_CODE (op2
) == INTEGER_CST
)
1289 build_int_cst (TREE_TYPE (op1
),
1290 TREE_INT_CST_LOW (op2
) /
1291 int_size_in_bytes (TREE_TYPE (op1
)));
1295 get_index_from_offset (op2
, SSA_NAME_DEF_STMT (op2
));
1296 if (index
== NULL_TREE
)
1298 mark_min_matrix_escape_level (mi
,
1299 current_indirect_level
,
1301 return current_indirect_level
;
1304 if (record_accesses
)
1305 record_access_alloc_site_info (mi
, use_stmt
, op2
,
1307 current_indirect_level
, false);
1310 /* If we are storing this level of indirection mark it as
1312 if (lhs_acc
.t_code
== INDIRECT_REF
|| TREE_CODE (lhs
) != SSA_NAME
)
1314 int l
= current_indirect_level
;
1316 /* One exception is when we are storing to the matrix
1317 variable itself; this is the case of malloc, we must make
1318 sure that it's the one and only one call to malloc so
1319 we call analyze_matrix_allocation_site to check
1321 if (TREE_CODE (lhs
) != VAR_DECL
|| lhs
!= mi
->decl
)
1322 mark_min_matrix_escape_level (mi
, current_indirect_level
,
1326 /* Also update the escaping level. */
1327 analyze_matrix_allocation_site (mi
, use_stmt
, l
, visited
);
1328 if (record_accesses
)
1329 record_access_alloc_site_info (mi
, use_stmt
, NULL_TREE
,
1330 NULL_TREE
, l
, true);
1335 /* We are placing it in an SSA, follow that SSA. */
1336 analyze_matrix_accesses (mi
, lhs
,
1337 current_indirect_level
,
1338 rhs_acc
.t_code
== POINTER_PLUS_EXPR
,
1339 visited
, record_accesses
);
1342 return current_indirect_level
;
1345 /* Given a SSA_VAR (coming from a use statement of the matrix MI),
1346 follow its uses and level of indirection and find out the minimum
1347 indirection level it escapes in (the highest dimension) and the maximum
1348 level it is accessed in (this will be the actual dimension of the
1349 matrix). The information is accumulated in MI.
1350 We look at the immediate uses, if one escapes we finish; if not,
1351 we make a recursive call for each one of the immediate uses of the
1352 resulting SSA name. */
1354 analyze_matrix_accesses (struct matrix_info
*mi
, tree ssa_var
,
1355 int current_indirect_level
, bool last_op
,
1356 sbitmap visited
, bool record_accesses
)
1358 imm_use_iterator imm_iter
;
1359 use_operand_p use_p
;
1361 update_type_size (mi
, SSA_NAME_DEF_STMT (ssa_var
), ssa_var
,
1362 current_indirect_level
);
1364 /* We don't go beyond the escaping level when we are performing the
1365 flattening. NOTE: we keep the last indirection level that doesn't
1367 if (mi
->min_indirect_level_escape
> -1
1368 && mi
->min_indirect_level_escape
<= current_indirect_level
)
1371 /* Now go over the uses of the SSA_NAME and check how it is used in
1372 each one of them. We are mainly looking for the pattern INDIRECT_REF,
1373 then a POINTER_PLUS_EXPR, then INDIRECT_REF etc. while in between there could
1374 be any number of copies and casts. */
1375 gcc_assert (TREE_CODE (ssa_var
) == SSA_NAME
);
1377 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, ssa_var
)
1379 gimple use_stmt
= USE_STMT (use_p
);
1380 if (gimple_code (use_stmt
) == GIMPLE_PHI
)
1381 /* We check all the escaping levels that get to the PHI node
1382 and make sure they are all the same escaping;
1383 if not (which is rare) we let the escaping level be the
1384 minimum level that gets into that PHI because starting from
1385 that level we cannot expect the behavior of the indirections. */
1387 analyze_accesses_for_phi_node (mi
, use_stmt
, current_indirect_level
,
1388 visited
, record_accesses
);
1390 else if (is_gimple_call (use_stmt
))
1391 analyze_accesses_for_call_stmt (mi
, ssa_var
, use_stmt
,
1392 current_indirect_level
);
1393 else if (is_gimple_assign (use_stmt
))
1394 current_indirect_level
=
1395 analyze_accesses_for_assign_stmt (mi
, ssa_var
, use_stmt
,
1396 current_indirect_level
, last_op
,
1397 visited
, record_accesses
);
1407 /* A walk_tree function to go over the VAR_DECL, PARM_DECL nodes of
1408 the malloc size expression and check that those aren't changed
1409 over the function. */
1411 check_var_notmodified_p (tree
* tp
, int *walk_subtrees
, void *data
)
1415 check_var_data
*callback_data
= (check_var_data
*) data
;
1416 tree fn
= callback_data
->fn
;
1417 gimple_stmt_iterator gsi
;
1420 if (TREE_CODE (t
) != VAR_DECL
&& TREE_CODE (t
) != PARM_DECL
)
1423 FOR_EACH_BB_FN (bb
, DECL_STRUCT_FUNCTION (fn
))
1425 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1427 stmt
= gsi_stmt (gsi
);
1428 if (!is_gimple_assign (stmt
) && !is_gimple_call (stmt
))
1430 if (gimple_get_lhs (stmt
) == t
)
1432 callback_data
->stmt
= stmt
;
1441 /* Go backwards in the use-def chains and find out the expression
1442 represented by the possible SSA name in STMT, until it is composed
1443 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1444 we make sure that all the arguments represent the same subexpression,
1445 otherwise we fail. */
1448 can_calculate_stmt_before_stmt (gimple stmt
, sbitmap visited
)
1451 enum tree_code code
;
1453 switch (gimple_code (stmt
))
1456 code
= gimple_assign_rhs_code (stmt
);
1457 op1
= gimple_assign_rhs1 (stmt
);
1461 case POINTER_PLUS_EXPR
:
1466 op2
= gimple_assign_rhs2 (stmt
);
1467 op1
= can_calculate_expr_before_stmt (op1
, visited
);
1470 op2
= can_calculate_expr_before_stmt (op2
, visited
);
1472 return fold_build2 (code
, gimple_expr_type (stmt
), op1
, op2
);
1476 res
= can_calculate_expr_before_stmt (op1
, visited
);
1477 if (res
!= NULL_TREE
)
1478 return build1 (code
, gimple_expr_type (stmt
), res
);
1483 if (gimple_assign_single_p (stmt
))
1484 return can_calculate_expr_before_stmt (op1
, visited
);
1494 /* Make sure all the arguments represent the same value. */
1495 for (j
= 0; j
< gimple_phi_num_args (stmt
); j
++)
1498 tree def
= PHI_ARG_DEF (stmt
, j
);
1500 new_res
= can_calculate_expr_before_stmt (def
, visited
);
1501 if (res
== NULL_TREE
)
1503 else if (!new_res
|| !expressions_equal_p (res
, new_res
))
1514 /* Go backwards in the use-def chains and find out the expression
1515 represented by the possible SSA name in EXPR, until it is composed
1516 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1517 we make sure that all the arguments represent the same subexpression,
1518 otherwise we fail. */
1520 can_calculate_expr_before_stmt (tree expr
, sbitmap visited
)
1525 switch (TREE_CODE (expr
))
1528 /* Case of loop, we don't know to represent this expression. */
1529 if (TEST_BIT (visited
, SSA_NAME_VERSION (expr
)))
1532 SET_BIT (visited
, SSA_NAME_VERSION (expr
));
1533 def_stmt
= SSA_NAME_DEF_STMT (expr
);
1534 res
= can_calculate_stmt_before_stmt (def_stmt
, visited
);
1535 RESET_BIT (visited
, SSA_NAME_VERSION (expr
));
1547 /* There should be only one allocation function for the dimensions
1548 that don't escape. Here we check the allocation sites in this
1549 function. We must make sure that all the dimensions are allocated
1550 using malloc and that the malloc size parameter expression could be
1551 pre-calculated before the call to the malloc of dimension 0.
1553 Given a candidate matrix for flattening -- MI -- check if it's
1554 appropriate for flattening -- we analyze the allocation
1555 sites that we recorded in the previous analysis. The result of the
1556 analysis is a level of indirection (matrix dimension) in which the
1557 flattening is safe. We check the following conditions:
1558 1. There is only one allocation site for each dimension.
1559 2. The allocation sites of all the dimensions are in the same
1561 (The above two are being taken care of during the analysis when
1562 we check the allocation site).
1563 3. All the dimensions that we flatten are allocated at once; thus
1564 the total size must be known before the allocation of the
1565 dimension 0 (top level) -- we must make sure we represent the
1566 size of the allocation as an expression of global parameters or
1567 constants and that those doesn't change over the function. */
1570 check_allocation_function (void **slot
, void *data ATTRIBUTE_UNUSED
)
1573 struct matrix_info
*mi
= (struct matrix_info
*) *slot
;
1576 if (!mi
->malloc_for_level
)
1579 visited
= sbitmap_alloc (num_ssa_names
);
1581 /* Do nothing if the current function is not the allocation
1583 if (mi
->allocation_function_decl
!= current_function_decl
1584 /* We aren't in the main allocation function yet. */
1585 || !mi
->malloc_for_level
[0])
1588 for (level
= 1; level
< mi
->max_malloced_level
; level
++)
1589 if (!mi
->malloc_for_level
[level
])
1592 mark_min_matrix_escape_level (mi
, level
, NULL
);
1594 /* Check if the expression of the size passed to malloc could be
1595 pre-calculated before the malloc of level 0. */
1596 for (level
= 1; level
< mi
->min_indirect_level_escape
; level
++)
1600 struct malloc_call_data mcd
= {NULL
, NULL_TREE
, NULL_TREE
};
1602 call_stmt
= mi
->malloc_for_level
[level
];
1604 /* Find the correct malloc information. */
1605 collect_data_for_malloc_call (call_stmt
, &mcd
);
1607 /* No need to check anticipation for constants. */
1608 if (TREE_CODE (mcd
.size_var
) == INTEGER_CST
)
1610 if (!mi
->dimension_size
)
1612 mi
->dimension_size
=
1613 (tree
*) xcalloc (mi
->min_indirect_level_escape
,
1615 mi
->dimension_size_orig
=
1616 (tree
*) xcalloc (mi
->min_indirect_level_escape
,
1619 mi
->dimension_size
[level
] = mcd
.size_var
;
1620 mi
->dimension_size_orig
[level
] = mcd
.size_var
;
1623 /* ??? Here we should also add the way to calculate the size
1624 expression not only know that it is anticipated. */
1625 sbitmap_zero (visited
);
1626 size
= can_calculate_expr_before_stmt (mcd
.size_var
, visited
);
1627 if (size
== NULL_TREE
)
1629 mark_min_matrix_escape_level (mi
, level
, call_stmt
);
1632 "Matrix %s: Cannot calculate the size of allocation, escaping at level %d\n",
1633 get_name (mi
->decl
), level
);
1636 if (!mi
->dimension_size
)
1638 mi
->dimension_size
=
1639 (tree
*) xcalloc (mi
->min_indirect_level_escape
, sizeof (tree
));
1640 mi
->dimension_size_orig
=
1641 (tree
*) xcalloc (mi
->min_indirect_level_escape
, sizeof (tree
));
1643 mi
->dimension_size
[level
] = size
;
1644 mi
->dimension_size_orig
[level
] = size
;
1647 /* We don't need those anymore. */
1648 for (level
= mi
->min_indirect_level_escape
;
1649 level
< mi
->max_malloced_level
; level
++)
1650 mi
->malloc_for_level
[level
] = NULL
;
1654 /* Track all access and allocation sites. */
1656 find_sites_in_func (bool record
)
1658 sbitmap visited_stmts_1
;
1660 gimple_stmt_iterator gsi
;
1663 struct matrix_info tmpmi
, *mi
;
1665 visited_stmts_1
= sbitmap_alloc (num_ssa_names
);
1669 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1673 stmt
= gsi_stmt (gsi
);
1674 lhs
= gimple_get_lhs (stmt
);
1675 if (lhs
!= NULL_TREE
1676 && TREE_CODE (lhs
) == VAR_DECL
)
1679 if ((mi
= (struct matrix_info
*) htab_find (matrices_to_reorg
,
1682 sbitmap_zero (visited_stmts_1
);
1683 analyze_matrix_allocation_site (mi
, stmt
, 0, visited_stmts_1
);
1686 if (is_gimple_assign (stmt
)
1687 && gimple_assign_single_p (stmt
)
1688 && TREE_CODE (lhs
) == SSA_NAME
1689 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == VAR_DECL
)
1691 tmpmi
.decl
= gimple_assign_rhs1 (stmt
);
1692 if ((mi
= (struct matrix_info
*) htab_find (matrices_to_reorg
,
1695 sbitmap_zero (visited_stmts_1
);
1696 analyze_matrix_accesses (mi
, lhs
, 0,
1697 false, visited_stmts_1
, record
);
1702 sbitmap_free (visited_stmts_1
);
1705 /* Traverse the use-def chains to see if there are matrices that
1706 are passed through pointers and we cannot know how they are accessed.
1707 For each SSA-name defined by a global variable of our interest,
1708 we traverse the use-def chains of the SSA and follow the indirections,
1709 and record in what level of indirection the use of the variable
1710 escapes. A use of a pointer escapes when it is passed to a function,
1711 stored into memory or assigned (except in malloc and free calls). */
1714 record_all_accesses_in_func (void)
1717 sbitmap visited_stmts_1
;
1719 visited_stmts_1
= sbitmap_alloc (num_ssa_names
);
1721 for (i
= 0; i
< num_ssa_names
; i
++)
1723 struct matrix_info tmpmi
, *mi
;
1724 tree ssa_var
= ssa_name (i
);
1728 || !is_gimple_assign (SSA_NAME_DEF_STMT (ssa_var
))
1729 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (ssa_var
)))
1731 rhs
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (ssa_var
));
1732 lhs
= gimple_assign_lhs (SSA_NAME_DEF_STMT (ssa_var
));
1733 if (TREE_CODE (rhs
) != VAR_DECL
&& TREE_CODE (lhs
) != VAR_DECL
)
1736 /* If the RHS is a matrix that we want to analyze, follow the def-use
1737 chain for this SSA_VAR and check for escapes or apply the
1740 if ((mi
= (struct matrix_info
*) htab_find (matrices_to_reorg
, &tmpmi
)))
1742 /* This variable will track the visited PHI nodes, so we can limit
1743 its size to the maximum number of SSA names. */
1744 sbitmap_zero (visited_stmts_1
);
1745 analyze_matrix_accesses (mi
, ssa_var
,
1746 0, false, visited_stmts_1
, true);
1750 sbitmap_free (visited_stmts_1
);
1753 /* Used when we want to convert the expression: RESULT = something *
1754 ORIG to RESULT = something * NEW_VAL. If ORIG and NEW_VAL are power
1755 of 2, shift operations can be done, else division and
1759 compute_offset (HOST_WIDE_INT orig
, HOST_WIDE_INT new_val
, tree result
)
1763 tree result1
, ratio
, log
, orig_tree
, new_tree
;
1765 x
= exact_log2 (orig
);
1766 y
= exact_log2 (new_val
);
1768 if (x
!= -1 && y
!= -1)
1774 log
= build_int_cst (TREE_TYPE (result
), x
- y
);
1776 fold_build2 (LSHIFT_EXPR
, TREE_TYPE (result
), result
, log
);
1779 log
= build_int_cst (TREE_TYPE (result
), y
- x
);
1780 result1
= fold_build2 (RSHIFT_EXPR
, TREE_TYPE (result
), result
, log
);
1784 orig_tree
= build_int_cst (TREE_TYPE (result
), orig
);
1785 new_tree
= build_int_cst (TREE_TYPE (result
), new_val
);
1786 ratio
= fold_build2 (TRUNC_DIV_EXPR
, TREE_TYPE (result
), result
, orig_tree
);
1787 result1
= fold_build2 (MULT_EXPR
, TREE_TYPE (result
), ratio
, new_tree
);
1793 /* We know that we are allowed to perform matrix flattening (according to the
1794 escape analysis), so we traverse the use-def chains of the SSA vars
1795 defined by the global variables pointing to the matrices of our interest.
1796 in each use of the SSA we calculate the offset from the base address
1797 according to the following equation:
1799 a[I1][I2]...[Ik] , where D1..Dk is the length of each dimension and the
1800 escaping level is m <= k, and a' is the new allocated matrix,
1801 will be translated to :
1806 b = a' + I1*D2...*Dm + I2*D3...Dm + ... + Im
1810 transform_access_sites (void **slot
, void *data ATTRIBUTE_UNUSED
)
1812 gimple_stmt_iterator gsi
;
1813 struct matrix_info
*mi
= (struct matrix_info
*) *slot
;
1814 int min_escape_l
= mi
->min_indirect_level_escape
;
1815 struct access_site_info
*acc_info
;
1816 enum tree_code code
;
1819 if (min_escape_l
< 2 || !mi
->access_l
)
1821 for (i
= 0; VEC_iterate (access_site_info_p
, mi
->access_l
, i
, acc_info
);
1824 /* This is possible because we collect the access sites before
1825 we determine the final minimum indirection level. */
1826 if (acc_info
->level
>= min_escape_l
)
1831 if (acc_info
->is_alloc
)
1833 if (acc_info
->level
>= 0 && gimple_bb (acc_info
->stmt
))
1837 gimple stmt
= acc_info
->stmt
;
1840 FOR_EACH_SSA_TREE_OPERAND (def
, stmt
, iter
, SSA_OP_DEF
)
1841 mark_sym_for_renaming (SSA_NAME_VAR (def
));
1842 gsi
= gsi_for_stmt (stmt
);
1843 gcc_assert (is_gimple_assign (acc_info
->stmt
));
1844 lhs
= gimple_assign_lhs (acc_info
->stmt
);
1845 if (TREE_CODE (lhs
) == SSA_NAME
1846 && acc_info
->level
< min_escape_l
- 1)
1848 imm_use_iterator imm_iter
;
1849 use_operand_p use_p
;
1852 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, lhs
)
1853 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
1858 gcc_assert (gimple_assign_rhs_code (acc_info
->stmt
)
1860 /* Emit convert statement to convert to type of use. */
1861 tmp
= create_tmp_var (TREE_TYPE (lhs
), "new");
1862 add_referenced_var (tmp
);
1863 rhs
= gimple_assign_rhs1 (acc_info
->stmt
);
1864 rhs
= fold_convert (TREE_TYPE (tmp
),
1865 TREE_OPERAND (rhs
, 0));
1866 new_stmt
= gimple_build_assign (tmp
, rhs
);
1867 tmp
= make_ssa_name (tmp
, new_stmt
);
1868 gimple_assign_set_lhs (new_stmt
, tmp
);
1869 gsi
= gsi_for_stmt (acc_info
->stmt
);
1870 gsi_insert_after (&gsi
, new_stmt
, GSI_SAME_STMT
);
1871 SET_USE (use_p
, tmp
);
1874 if (acc_info
->level
< min_escape_l
- 1)
1875 gsi_remove (&gsi
, true);
1880 code
= gimple_assign_rhs_code (acc_info
->stmt
);
1881 if (code
== INDIRECT_REF
1882 && acc_info
->level
< min_escape_l
- 1)
1884 /* Replace the INDIRECT_REF with NOP (cast) usually we are casting
1885 from "pointer to type" to "type". */
1887 build1 (NOP_EXPR
, TREE_TYPE (gimple_assign_rhs1 (acc_info
->stmt
)),
1888 TREE_OPERAND (gimple_assign_rhs1 (acc_info
->stmt
), 0));
1889 gimple_assign_set_rhs_code (acc_info
->stmt
, NOP_EXPR
);
1890 gimple_assign_set_rhs1 (acc_info
->stmt
, t
);
1892 else if (code
== POINTER_PLUS_EXPR
1893 && acc_info
->level
< (min_escape_l
))
1895 imm_use_iterator imm_iter
;
1896 use_operand_p use_p
;
1899 int k
= acc_info
->level
;
1900 tree num_elements
, total_elements
;
1902 tree d_size
= mi
->dimension_size
[k
];
1904 /* We already make sure in the analysis that the first operand
1905 is the base and the second is the offset. */
1906 offset
= acc_info
->offset
;
1907 if (mi
->dim_map
[k
] == min_escape_l
- 1)
1909 if (!check_transpose_p
|| mi
->is_transposed_p
== false)
1916 compute_offset (mi
->dimension_type_size
[min_escape_l
],
1917 mi
->dimension_type_size
[k
+ 1], offset
);
1919 total_elements
= new_offset
;
1920 if (new_offset
!= offset
)
1922 gsi
= gsi_for_stmt (acc_info
->stmt
);
1923 tmp1
= force_gimple_operand_gsi (&gsi
, total_elements
,
1925 true, GSI_SAME_STMT
);
1933 d_size
= mi
->dimension_size
[mi
->dim_map
[k
] + 1];
1935 fold_build2 (MULT_EXPR
, sizetype
, fold_convert (sizetype
, acc_info
->index
),
1936 fold_convert (sizetype
, d_size
));
1937 add_referenced_var (d_size
);
1938 gsi
= gsi_for_stmt (acc_info
->stmt
);
1939 tmp1
= force_gimple_operand_gsi (&gsi
, num_elements
, true,
1940 NULL
, true, GSI_SAME_STMT
);
1942 /* Replace the offset if needed. */
1945 if (TREE_CODE (offset
) == SSA_NAME
)
1949 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, offset
)
1950 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
1951 if (use_stmt
== acc_info
->stmt
)
1952 SET_USE (use_p
, tmp1
);
1956 gcc_assert (TREE_CODE (offset
) == INTEGER_CST
);
1957 gimple_assign_set_rhs2 (acc_info
->stmt
, tmp1
);
1958 update_stmt (acc_info
->stmt
);
1962 /* ??? meanwhile this happens because we record the same access
1963 site more than once; we should be using a hash table to
1964 avoid this and insert the STMT of the access site only
1967 gcc_unreachable (); */
1970 VEC_free (access_site_info_p
, heap
, mi
->access_l
);
1972 update_ssa (TODO_update_ssa
);
1973 #ifdef ENABLE_CHECKING
1979 /* Sort A array of counts. Arrange DIM_MAP to reflect the new order. */
1982 sort_dim_hot_level (gcov_type
* a
, int *dim_map
, int n
)
1987 for (i
= 0; i
< n
- 1; i
++)
1989 for (j
= 0; j
< n
- 1 - i
; j
++)
1991 if (a
[j
+ 1] < a
[j
])
1993 tmp
= a
[j
]; /* swap a[j] and a[j+1] */
1997 dim_map
[j
] = dim_map
[j
+ 1];
1998 dim_map
[j
+ 1] = tmp1
;
2004 /* Replace multiple mallocs (one for each dimension) to one malloc
2005 with the size of DIM1*DIM2*...*DIMN*size_of_element
2006 Make sure that we hold the size in the malloc site inside a
2007 new global variable; this way we ensure that the size doesn't
2008 change and it is accessible from all the other functions that
2009 uses the matrix. Also, the original calls to free are deleted,
2010 and replaced by a new call to free the flattened matrix. */
2013 transform_allocation_sites (void **slot
, void *data ATTRIBUTE_UNUSED
)
2016 struct matrix_info
*mi
;
2017 tree type
, oldfn
, prev_dim_size
;
2018 gimple call_stmt_0
, use_stmt
;
2019 struct cgraph_node
*c_node
;
2020 struct cgraph_edge
*e
;
2021 gimple_stmt_iterator gsi
;
2022 struct malloc_call_data mcd
= {NULL
, NULL_TREE
, NULL_TREE
};
2023 HOST_WIDE_INT element_size
;
2025 imm_use_iterator imm_iter
;
2026 use_operand_p use_p
;
2027 tree old_size_0
, tmp
;
2031 mi
= (struct matrix_info
*) *slot
;
2033 min_escape_l
= mi
->min_indirect_level_escape
;
2035 if (!mi
->malloc_for_level
)
2036 mi
->min_indirect_level_escape
= 0;
2038 if (mi
->min_indirect_level_escape
< 2)
2041 mi
->dim_map
= (int *) xcalloc (mi
->min_indirect_level_escape
, sizeof (int));
2042 for (i
= 0; i
< mi
->min_indirect_level_escape
; i
++)
2044 if (check_transpose_p
)
2050 fprintf (dump_file
, "Matrix %s:\n", get_name (mi
->decl
));
2051 for (i
= 0; i
< min_escape_l
; i
++)
2053 fprintf (dump_file
, "dim %d before sort ", i
);
2054 if (mi
->dim_hot_level
)
2056 "count is " HOST_WIDEST_INT_PRINT_DEC
" \n",
2057 mi
->dim_hot_level
[i
]);
2060 sort_dim_hot_level (mi
->dim_hot_level
, mi
->dim_map
,
2061 mi
->min_indirect_level_escape
);
2063 for (i
= 0; i
< min_escape_l
; i
++)
2065 fprintf (dump_file
, "dim %d after sort\n", i
);
2066 if (mi
->dim_hot_level
)
2067 fprintf (dump_file
, "count is " HOST_WIDE_INT_PRINT_DEC
2068 " \n", (HOST_WIDE_INT
) mi
->dim_hot_level
[i
]);
2070 for (i
= 0; i
< mi
->min_indirect_level_escape
; i
++)
2073 fprintf (dump_file
, "dim_map[%d] after sort %d\n", i
,
2075 if (mi
->dim_map
[i
] != i
)
2079 "Transposed dimensions: dim %d is now dim %d\n",
2081 mi
->is_transposed_p
= true;
2087 for (i
= 0; i
< mi
->min_indirect_level_escape
; i
++)
2090 /* Call statement of allocation site of level 0. */
2091 call_stmt_0
= mi
->malloc_for_level
[0];
2093 /* Finds the correct malloc information. */
2094 collect_data_for_malloc_call (call_stmt_0
, &mcd
);
2096 mi
->dimension_size
[0] = mcd
.size_var
;
2097 mi
->dimension_size_orig
[0] = mcd
.size_var
;
2098 /* Make sure that the variables in the size expression for
2099 all the dimensions (above level 0) aren't modified in
2100 the allocation function. */
2101 for (i
= 1; i
< mi
->min_indirect_level_escape
; i
++)
2104 check_var_data data
;
2106 /* mi->dimension_size must contain the expression of the size calculated
2107 in check_allocation_function. */
2108 gcc_assert (mi
->dimension_size
[i
]);
2110 data
.fn
= mi
->allocation_function_decl
;
2112 t
= walk_tree_without_duplicates (&(mi
->dimension_size
[i
]),
2113 check_var_notmodified_p
,
2117 mark_min_matrix_escape_level (mi
, i
, data
.stmt
);
2122 if (mi
->min_indirect_level_escape
< 2)
2125 /* Since we should make sure that the size expression is available
2126 before the call to malloc of level 0. */
2127 gsi
= gsi_for_stmt (call_stmt_0
);
2129 /* Find out the size of each dimension by looking at the malloc
2130 sites and create a global variable to hold it.
2131 We add the assignment to the global before the malloc of level 0. */
2133 /* To be able to produce gimple temporaries. */
2134 oldfn
= current_function_decl
;
2135 current_function_decl
= mi
->allocation_function_decl
;
2136 push_cfun (DECL_STRUCT_FUNCTION (mi
->allocation_function_decl
));
2138 /* Set the dimension sizes as follows:
2139 DIM_SIZE[i] = DIM_SIZE[n] * ... * DIM_SIZE[i]
2140 where n is the maximum non escaping level. */
2141 element_size
= mi
->dimension_type_size
[mi
->min_indirect_level_escape
];
2142 prev_dim_size
= NULL_TREE
;
2144 for (i
= mi
->min_indirect_level_escape
- 1; i
>= 0; i
--)
2146 tree dim_size
, dim_var
;
2150 /* Now put the size expression in a global variable and initialize it to
2151 the size expression before the malloc of level 0. */
2153 add_new_static_var (TREE_TYPE
2154 (mi
->dimension_size_orig
[mi
->dim_map
[i
]]));
2155 type
= TREE_TYPE (mi
->dimension_size_orig
[mi
->dim_map
[i
]]);
2157 /* DIM_SIZE = MALLOC_SIZE_PARAM / TYPE_SIZE. */
2158 /* Find which dim ID becomes dim I. */
2159 for (id
= 0; id
< mi
->min_indirect_level_escape
; id
++)
2160 if (mi
->dim_map
[id
] == i
)
2163 build_int_cst (type
, mi
->dimension_type_size
[id
+ 1]);
2165 prev_dim_size
= build_int_cst (type
, element_size
);
2166 if (!check_transpose_p
&& i
== mi
->min_indirect_level_escape
- 1)
2168 dim_size
= mi
->dimension_size_orig
[id
];
2173 fold_build2 (TRUNC_DIV_EXPR
, type
, mi
->dimension_size_orig
[id
],
2176 dim_size
= fold_build2 (MULT_EXPR
, type
, dim_size
, prev_dim_size
);
2178 dim_size
= force_gimple_operand_gsi (&gsi
, dim_size
, true, NULL
,
2179 true, GSI_SAME_STMT
);
2180 /* GLOBAL_HOLDING_THE_SIZE = DIM_SIZE. */
2181 stmt
= gimple_build_assign (dim_var
, dim_size
);
2182 mark_symbols_for_renaming (stmt
);
2183 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
2185 prev_dim_size
= mi
->dimension_size
[i
] = dim_var
;
2187 update_ssa (TODO_update_ssa
);
2188 /* Replace the malloc size argument in the malloc of level 0 to be
2189 the size of all the dimensions. */
2190 c_node
= cgraph_node (mi
->allocation_function_decl
);
2191 old_size_0
= gimple_call_arg (call_stmt_0
, 0);
2192 tmp
= force_gimple_operand_gsi (&gsi
, mi
->dimension_size
[0], true,
2193 NULL
, true, GSI_SAME_STMT
);
2194 if (TREE_CODE (old_size_0
) == SSA_NAME
)
2196 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, old_size_0
)
2197 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
2198 if (use_stmt
== call_stmt_0
)
2199 SET_USE (use_p
, tmp
);
2201 /* When deleting the calls to malloc we need also to remove the edge from
2202 the call graph to keep it consistent. Notice that cgraph_edge may
2203 create a new node in the call graph if there is no node for the given
2204 declaration; this shouldn't be the case but currently there is no way to
2205 check this outside of "cgraph.c". */
2206 for (i
= 1; i
< mi
->min_indirect_level_escape
; i
++)
2208 gimple_stmt_iterator gsi
;
2209 gimple use_stmt1
= NULL
;
2211 gimple call_stmt
= mi
->malloc_for_level
[i
];
2212 gcc_assert (is_gimple_call (call_stmt
));
2213 e
= cgraph_edge (c_node
, call_stmt
);
2215 cgraph_remove_edge (e
);
2216 gsi
= gsi_for_stmt (call_stmt
);
2217 /* Remove the call stmt. */
2218 gsi_remove (&gsi
, true);
2219 /* remove the type cast stmt. */
2220 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
,
2221 gimple_call_lhs (call_stmt
))
2223 use_stmt1
= use_stmt
;
2224 gsi
= gsi_for_stmt (use_stmt
);
2225 gsi_remove (&gsi
, true);
2227 /* Remove the assignment of the allocated area. */
2228 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
,
2229 gimple_get_lhs (use_stmt1
))
2231 gsi
= gsi_for_stmt (use_stmt
);
2232 gsi_remove (&gsi
, true);
2235 update_ssa (TODO_update_ssa
);
2236 #ifdef ENABLE_CHECKING
2239 /* Delete the calls to free. */
2240 for (i
= 1; i
< mi
->min_indirect_level_escape
; i
++)
2242 gimple_stmt_iterator gsi
;
2244 /* ??? wonder why this case is possible but we failed on it once. */
2245 if (!mi
->free_stmts
[i
].stmt
)
2248 c_node
= cgraph_node (mi
->free_stmts
[i
].func
);
2249 gcc_assert (is_gimple_call (mi
->free_stmts
[i
].stmt
));
2250 e
= cgraph_edge (c_node
, mi
->free_stmts
[i
].stmt
);
2252 cgraph_remove_edge (e
);
2253 current_function_decl
= mi
->free_stmts
[i
].func
;
2254 set_cfun (DECL_STRUCT_FUNCTION (mi
->free_stmts
[i
].func
));
2255 gsi
= gsi_for_stmt (mi
->free_stmts
[i
].stmt
);
2256 gsi_remove (&gsi
, true);
2258 /* Return to the previous situation. */
2259 current_function_decl
= oldfn
;
2266 /* Print out the results of the escape analysis. */
2268 dump_matrix_reorg_analysis (void **slot
, void *data ATTRIBUTE_UNUSED
)
2270 struct matrix_info
*mi
= (struct matrix_info
*) *slot
;
2274 fprintf (dump_file
, "Matrix \"%s\"; Escaping Level: %d, Num Dims: %d,",
2275 get_name (mi
->decl
), mi
->min_indirect_level_escape
, mi
->num_dims
);
2276 fprintf (dump_file
, " Malloc Dims: %d, ", mi
->max_malloced_level
);
2277 fprintf (dump_file
, "\n");
2278 if (mi
->min_indirect_level_escape
>= 2)
2279 fprintf (dump_file
, "Flattened %d dimensions \n",
2280 mi
->min_indirect_level_escape
);
2284 /* Perform matrix flattening. */
2289 struct cgraph_node
*node
;
2292 check_transpose_p
= true;
2294 check_transpose_p
= false;
2295 /* If there are hand written vectors, we skip this optimization. */
2296 for (node
= cgraph_nodes
; node
; node
= node
->next
)
2297 if (!may_flatten_matrices (node
))
2299 matrices_to_reorg
= htab_create (37, mtt_info_hash
, mtt_info_eq
, mat_free
);
2300 /* Find and record all potential matrices in the program. */
2301 find_matrices_decl ();
2302 /* Analyze the accesses of the matrices (escaping analysis). */
2303 for (node
= cgraph_nodes
; node
; node
= node
->next
)
2308 temp_fn
= current_function_decl
;
2309 current_function_decl
= node
->decl
;
2310 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
2311 bitmap_obstack_initialize (NULL
);
2312 gimple_register_cfg_hooks ();
2314 if (!gimple_in_ssa_p (cfun
))
2316 free_dominance_info (CDI_DOMINATORS
);
2317 free_dominance_info (CDI_POST_DOMINATORS
);
2319 current_function_decl
= temp_fn
;
2320 bitmap_obstack_release (NULL
);
2325 #ifdef ENABLE_CHECKING
2326 verify_flow_info ();
2329 if (!matrices_to_reorg
)
2331 free_dominance_info (CDI_DOMINATORS
);
2332 free_dominance_info (CDI_POST_DOMINATORS
);
2334 current_function_decl
= temp_fn
;
2335 bitmap_obstack_release (NULL
);
2340 /* Create htap for phi nodes. */
2341 htab_mat_acc_phi_nodes
= htab_create (37, mat_acc_phi_hash
,
2342 mat_acc_phi_eq
, free
);
2343 if (!check_transpose_p
)
2344 find_sites_in_func (false);
2347 find_sites_in_func (true);
2348 loop_optimizer_init (LOOPS_NORMAL
);
2351 htab_traverse (matrices_to_reorg
, analyze_transpose
, NULL
);
2355 loop_optimizer_finalize ();
2356 current_loops
= NULL
;
2359 /* If the current function is the allocation function for any of
2360 the matrices we check its allocation and the escaping level. */
2361 htab_traverse (matrices_to_reorg
, check_allocation_function
, NULL
);
2362 free_dominance_info (CDI_DOMINATORS
);
2363 free_dominance_info (CDI_POST_DOMINATORS
);
2365 current_function_decl
= temp_fn
;
2366 bitmap_obstack_release (NULL
);
2368 htab_traverse (matrices_to_reorg
, transform_allocation_sites
, NULL
);
2369 /* Now transform the accesses. */
2370 for (node
= cgraph_nodes
; node
; node
= node
->next
)
2373 /* Remember that allocation sites have been handled. */
2376 temp_fn
= current_function_decl
;
2377 current_function_decl
= node
->decl
;
2378 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
2379 bitmap_obstack_initialize (NULL
);
2380 gimple_register_cfg_hooks ();
2381 record_all_accesses_in_func ();
2382 htab_traverse (matrices_to_reorg
, transform_access_sites
, NULL
);
2383 free_dominance_info (CDI_DOMINATORS
);
2384 free_dominance_info (CDI_POST_DOMINATORS
);
2386 current_function_decl
= temp_fn
;
2387 bitmap_obstack_release (NULL
);
2389 htab_traverse (matrices_to_reorg
, dump_matrix_reorg_analysis
, NULL
);
2391 current_function_decl
= NULL
;
2393 matrices_to_reorg
= NULL
;
2398 /* The condition for matrix flattening to be performed. */
2400 gate_matrix_reorg (void)
2402 return flag_ipa_matrix_reorg
&& flag_whole_program
;
2405 struct simple_ipa_opt_pass pass_ipa_matrix_reorg
=
2409 "matrix-reorg", /* name */
2410 gate_matrix_reorg
, /* gate */
2411 matrix_reorg
, /* execute */
2414 0, /* static_pass_number */
2415 TV_NONE
, /* tv_id */
2416 0, /* properties_required */
2417 0, /* properties_provided */
2418 0, /* properties_destroyed */
2419 0, /* todo_flags_start */
2420 TODO_dump_cgraph
| TODO_dump_func
/* todo_flags_finish */