1 /* Matrix layout transformations.
2 Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
3 Contributed by Razya Ladelsky <razya@il.ibm.com>
4 Originally written by Revital Eres and Mustafa Hagog.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
23 Matrix flattening optimization tries to replace a N-dimensional
24 matrix with its equivalent M-dimensional matrix, where M < N.
25 This first implementation focuses on global matrices defined dynamically.
27 When N==1, we actually flatten the whole matrix.
28 For instance consider a two-dimensional array a [dim1] [dim2].
29 The code for allocating space for it usually looks like:
31 a = (int **) malloc(dim1 * sizeof(int *));
32 for (i=0; i<dim1; i++)
33 a[i] = (int *) malloc (dim2 * sizeof(int));
35 If the array "a" is found suitable for this optimization,
36 its allocation is replaced by:
38 a = (int *) malloc (dim1 * dim2 *sizeof(int));
40 and all the references to a[i][j] are replaced by a[i * dim2 + j].
42 The two main phases of the optimization are the analysis
44 The driver of the optimization is matrix_reorg ().
51 We'll number the dimensions outside-in, meaning the most external
52 is 0, then 1, and so on.
53 The analysis part of the optimization determines K, the escape
54 level of a N-dimensional matrix (K <= N), that allows flattening of
55 the external dimensions 0,1,..., K-1. Escape level 0 means that the
56 whole matrix escapes and no flattening is possible.
58 The analysis part is implemented in analyze_matrix_allocation_site()
59 and analyze_matrix_accesses().
63 In this phase we define the new flattened matrices that replace the
64 original matrices in the code.
65 Implemented in transform_allocation_sites(),
66 transform_access_sites().
70 The idea of Matrix Transposing is organizing the matrix in a different
71 layout such that the dimensions are reordered.
72 This could produce better cache behavior in some cases.
74 For example, lets look at the matrix accesses in the following loop:
80 This loop can produce good cache behavior because the elements of
81 the inner dimension are accessed sequentially.
83 However, if the accesses of the matrix were of the following form:
89 In this loop we iterate the columns and not the rows.
90 Therefore, replacing the rows and columns
91 would have had an organization with better (cache) locality.
92 Replacing the dimensions of the matrix is called matrix transposing.
94 This example, of course, could be enhanced to multiple dimensions matrices
97 Since a program could include all kind of accesses, there is a decision
98 mechanism, implemented in analyze_transpose(), which implements a
99 heuristic that tries to determine whether to transpose the matrix or not,
100 according to the form of the more dominant accesses.
101 This decision is transferred to the flattening mechanism, and whether
102 the matrix was transposed or not, the matrix is flattened (if possible).
104 This decision making is based on profiling information and loop information.
105 If profiling information is available, decision making mechanism will be
106 operated, otherwise the matrix will only be flattened (if possible).
108 Both optimizations are described in the paper "Matrix flattening and
109 transposing in GCC" which was presented in GCC summit 2006.
110 http://www.gccsummit.org/2006/2006-GCC-Summit-Proceedings.pdf. */
114 #include "coretypes.h"
118 #include "tree-inline.h"
119 #include "tree-flow.h"
120 #include "tree-flow-inline.h"
121 #include "langhooks.h"
129 #include "diagnostic.h"
134 #include "function.h"
135 #include "basic-block.h"
137 #include "tree-iterator.h"
138 #include "tree-pass.h"
140 #include "tree-data-ref.h"
141 #include "tree-chrec.h"
142 #include "tree-scalar-evolution.h"
144 /* We need to collect a lot of data from the original malloc,
145 particularly as the gimplifier has converted:
147 orig_var = (struct_type *) malloc (x * sizeof (struct_type *));
151 T3 = <constant> ; ** <constant> is amount to malloc; precomputed **
153 T5 = (struct_type *) T4;
156 The following struct fields allow us to collect all the necessary data from
157 the gimplified program. The comments in the struct below are all based
158 on the gimple example above. */
160 struct malloc_call_data
162 gimple call_stmt
; /* Tree for "T4 = malloc (T3);" */
163 tree size_var
; /* Var decl for T3. */
164 tree malloc_size
; /* Tree for "<constant>", the rhs assigned to T3. */
167 static tree
can_calculate_expr_before_stmt (tree
, sbitmap
);
168 static tree
can_calculate_stmt_before_stmt (gimple
, sbitmap
);
170 /* The front end of the compiler, when parsing statements of the form:
172 var = (type_cast) malloc (sizeof (type));
174 always converts this single statement into the following statements
179 T.3 = (type_cast) T.2;
182 Since we need to create new malloc statements and modify the original
183 statements somewhat, we need to find all four of the above statements.
184 Currently record_call_1 (called for building cgraph edges) finds and
185 records the statements containing the actual call to malloc, but we
186 need to find the rest of the variables/statements on our own. That
187 is what the following function does. */
189 collect_data_for_malloc_call (gimple stmt
, struct malloc_call_data
*m_data
)
191 tree size_var
= NULL
;
195 gcc_assert (is_gimple_call (stmt
));
197 malloc_fn_decl
= gimple_call_fndecl (stmt
);
198 if (malloc_fn_decl
== NULL
199 || DECL_FUNCTION_CODE (malloc_fn_decl
) != BUILT_IN_MALLOC
)
202 arg1
= gimple_call_arg (stmt
, 0);
205 m_data
->call_stmt
= stmt
;
206 m_data
->size_var
= size_var
;
207 if (TREE_CODE (size_var
) != VAR_DECL
)
208 m_data
->malloc_size
= size_var
;
210 m_data
->malloc_size
= NULL_TREE
;
213 /* Information about matrix access site.
214 For example: if an access site of matrix arr is arr[i][j]
215 the ACCESS_SITE_INFO structure will have the address
216 of arr as its stmt. The INDEX_INFO will hold information about the
217 initial address and index of each dimension. */
218 struct access_site_info
220 /* The statement (INDIRECT_REF or POINTER_PLUS_EXPR). */
223 /* In case of POINTER_PLUS_EXPR, what is the offset. */
226 /* The index which created the offset. */
229 /* The indirection level of this statement. */
232 /* TRUE for allocation site FALSE for access site. */
235 /* The function containing the access site. */
238 /* This access is iterated in the inner most loop */
239 bool iterated_by_inner_most_loop_p
;
242 typedef struct access_site_info
*access_site_info_p
;
243 DEF_VEC_P (access_site_info_p
);
244 DEF_VEC_ALLOC_P (access_site_info_p
, heap
);
246 /* Calls to free when flattening a matrix. */
254 /* Information about matrix to flatten. */
257 /* Decl tree of this matrix. */
259 /* Number of dimensions; number
260 of "*" in the type declaration. */
263 /* Minimum indirection level that escapes, 0 means that
264 the whole matrix escapes, k means that dimensions
265 0 to ACTUAL_DIM - k escapes. */
266 int min_indirect_level_escape
;
268 gimple min_indirect_level_escape_stmt
;
270 /* Hold the allocation site for each level (dimension).
271 We can use NUM_DIMS as the upper bound and allocate the array
272 once with this number of elements and no need to use realloc and
273 MAX_MALLOCED_LEVEL. */
274 gimple
*malloc_for_level
;
276 int max_malloced_level
;
278 /* Is the matrix transposed. */
279 bool is_transposed_p
;
281 /* The location of the allocation sites (they must be in one
283 tree allocation_function_decl
;
285 /* The calls to free for each level of indirection. */
286 struct free_info
*free_stmts
;
288 /* An array which holds for each dimension its size. where
289 dimension 0 is the outer most (one that contains all the others).
291 tree
*dimension_size
;
293 /* An array which holds for each dimension it's original size
294 (before transposing and flattening take place). */
295 tree
*dimension_size_orig
;
297 /* An array which holds for each dimension the size of the type of
298 of elements accessed in that level (in bytes). */
299 HOST_WIDE_INT
*dimension_type_size
;
301 int dimension_type_size_len
;
303 /* An array collecting the count of accesses for each dimension. */
304 gcov_type
*dim_hot_level
;
306 /* An array of the accesses to be flattened.
307 elements are of type "struct access_site_info *". */
308 VEC (access_site_info_p
, heap
) * access_l
;
310 /* A map of how the dimensions will be organized at the end of
315 /* In each phi node we want to record the indirection level we have when we
316 get to the phi node. Usually we will have phi nodes with more than two
317 arguments, then we must assure that all of them get to the phi node with
318 the same indirection level, otherwise it's not safe to do the flattening.
319 So we record the information regarding the indirection level each time we
320 get to the phi node in this hash table. */
322 struct matrix_access_phi_node
325 int indirection_level
;
328 /* We use this structure to find if the SSA variable is accessed inside the
329 tree and record the tree containing it. */
331 struct ssa_acc_in_tree
333 /* The variable whose accesses in the tree we are looking for. */
335 /* The tree and code inside it the ssa_var is accessed, currently
336 it could be an INDIRECT_REF or CALL_EXPR. */
337 enum tree_code t_code
;
339 /* The place in the containing tree. */
345 static void analyze_matrix_accesses (struct matrix_info
*, tree
, int, bool,
347 static int transform_allocation_sites (void **, void *);
348 static int transform_access_sites (void **, void *);
349 static int analyze_transpose (void **, void *);
350 static int dump_matrix_reorg_analysis (void **, void *);
352 static bool check_transpose_p
;
354 /* Hash function used for the phi nodes. */
357 mat_acc_phi_hash (const void *p
)
359 const struct matrix_access_phi_node
*const ma_phi
=
360 (const struct matrix_access_phi_node
*) p
;
362 return htab_hash_pointer (ma_phi
->phi
);
365 /* Equality means phi node pointers are the same. */
368 mat_acc_phi_eq (const void *p1
, const void *p2
)
370 const struct matrix_access_phi_node
*const phi1
=
371 (const struct matrix_access_phi_node
*) p1
;
372 const struct matrix_access_phi_node
*const phi2
=
373 (const struct matrix_access_phi_node
*) p2
;
375 if (phi1
->phi
== phi2
->phi
)
381 /* Hold the PHI nodes we visit during the traversal for escaping
383 static htab_t htab_mat_acc_phi_nodes
= NULL
;
385 /* This hash-table holds the information about the matrices we are
387 static htab_t matrices_to_reorg
= NULL
;
389 /* Return a hash for MTT, which is really a "matrix_info *". */
391 mtt_info_hash (const void *mtt
)
393 return htab_hash_pointer (((const struct matrix_info
*) mtt
)->decl
);
396 /* Return true if MTT1 and MTT2 (which are really both of type
397 "matrix_info *") refer to the same decl. */
399 mtt_info_eq (const void *mtt1
, const void *mtt2
)
401 const struct matrix_info
*const i1
= (const struct matrix_info
*) mtt1
;
402 const struct matrix_info
*const i2
= (const struct matrix_info
*) mtt2
;
404 if (i1
->decl
== i2
->decl
)
410 /* Return false if STMT may contain a vector expression.
411 In this situation, all matrices should not be flattened. */
413 may_flatten_matrices_1 (gimple stmt
)
417 switch (gimple_code (stmt
))
420 if (!gimple_assign_cast_p (stmt
))
423 t
= gimple_assign_rhs1 (stmt
);
424 while (CONVERT_EXPR_P (t
))
426 if (TREE_TYPE (t
) && POINTER_TYPE_P (TREE_TYPE (t
)))
430 pointee
= TREE_TYPE (t
);
431 while (POINTER_TYPE_P (pointee
))
432 pointee
= TREE_TYPE (pointee
);
433 if (TREE_CODE (pointee
) == VECTOR_TYPE
)
437 "Found vector type, don't flatten matrix\n");
441 t
= TREE_OPERAND (t
, 0);
445 /* Asm code could contain vector operations. */
454 /* Return false if there are hand-written vectors in the program.
455 We disable the flattening in such a case. */
457 may_flatten_matrices (struct cgraph_node
*node
)
460 struct function
*func
;
462 gimple_stmt_iterator gsi
;
467 func
= DECL_STRUCT_FUNCTION (decl
);
468 FOR_EACH_BB_FN (bb
, func
)
469 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
470 if (!may_flatten_matrices_1 (gsi_stmt (gsi
)))
476 /* Given a VAR_DECL, check its type to determine whether it is
477 a definition of a dynamic allocated matrix and therefore is
478 a suitable candidate for the matrix flattening optimization.
479 Return NULL if VAR_DECL is not such decl. Otherwise, allocate
480 a MATRIX_INFO structure, fill it with the relevant information
481 and return a pointer to it.
482 TODO: handle also statically defined arrays. */
483 static struct matrix_info
*
484 analyze_matrix_decl (tree var_decl
)
486 struct matrix_info
*m_node
, tmpmi
, *mi
;
490 gcc_assert (matrices_to_reorg
);
492 if (TREE_CODE (var_decl
) == PARM_DECL
)
493 var_type
= DECL_ARG_TYPE (var_decl
);
494 else if (TREE_CODE (var_decl
) == VAR_DECL
)
495 var_type
= TREE_TYPE (var_decl
);
499 if (!POINTER_TYPE_P (var_type
))
502 while (POINTER_TYPE_P (var_type
))
504 var_type
= TREE_TYPE (var_type
);
511 if (!COMPLETE_TYPE_P (var_type
)
512 || TREE_CODE (TYPE_SIZE_UNIT (var_type
)) != INTEGER_CST
)
515 /* Check to see if this pointer is already in there. */
516 tmpmi
.decl
= var_decl
;
517 mi
= (struct matrix_info
*) htab_find (matrices_to_reorg
, &tmpmi
);
522 /* Record the matrix. */
524 m_node
= (struct matrix_info
*) xcalloc (1, sizeof (struct matrix_info
));
525 m_node
->decl
= var_decl
;
526 m_node
->num_dims
= dim_num
;
528 = (struct free_info
*) xcalloc (dim_num
, sizeof (struct free_info
));
530 /* Init min_indirect_level_escape to -1 to indicate that no escape
531 analysis has been done yet. */
532 m_node
->min_indirect_level_escape
= -1;
533 m_node
->is_transposed_p
= false;
542 struct matrix_info
*mat
= (struct matrix_info
*) e
;
548 free (mat
->free_stmts
);
549 if (mat
->dim_hot_level
)
550 free (mat
->dim_hot_level
);
551 if (mat
->malloc_for_level
)
552 free (mat
->malloc_for_level
);
555 /* Find all potential matrices.
556 TODO: currently we handle only multidimensional
557 dynamically allocated arrays. */
559 find_matrices_decl (void)
561 struct matrix_info
*tmp
;
563 struct varpool_node
*vnode
;
565 gcc_assert (matrices_to_reorg
);
567 /* For every global variable in the program:
568 Check to see if it's of a candidate type and record it. */
569 for (vnode
= varpool_nodes_queue
; vnode
; vnode
= vnode
->next_needed
)
571 tree var_decl
= vnode
->decl
;
573 if (!var_decl
|| TREE_CODE (var_decl
) != VAR_DECL
)
576 if (matrices_to_reorg
)
577 if ((tmp
= analyze_matrix_decl (var_decl
)))
579 if (!TREE_ADDRESSABLE (var_decl
))
581 slot
= htab_find_slot (matrices_to_reorg
, tmp
, INSERT
);
589 /* Mark that the matrix MI escapes at level L. */
591 mark_min_matrix_escape_level (struct matrix_info
*mi
, int l
, gimple s
)
593 if (mi
->min_indirect_level_escape
== -1
594 || (mi
->min_indirect_level_escape
> l
))
596 mi
->min_indirect_level_escape
= l
;
597 mi
->min_indirect_level_escape_stmt
= s
;
601 /* Find if the SSA variable is accessed inside the
602 tree and record the tree containing it.
603 The only relevant uses are the case of SSA_NAME, or SSA inside
604 INDIRECT_REF, PLUS_EXPR, POINTER_PLUS_EXPR, MULT_EXPR. */
606 ssa_accessed_in_tree (tree t
, struct ssa_acc_in_tree
*a
)
608 a
->t_code
= TREE_CODE (t
);
616 if (SSA_VAR_P (TREE_OPERAND (t
, 0))
617 && TREE_OPERAND (t
, 0) == a
->ssa_var
)
625 /* Find if the SSA variable is accessed on the right hand side of
629 ssa_accessed_in_call_rhs (gimple stmt
, struct ssa_acc_in_tree
*a
)
635 a
->t_code
= CALL_EXPR
;
636 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
638 arg
= gimple_call_arg (stmt
, i
);
639 if (arg
== a
->ssa_var
)
642 decl
= gimple_call_fndecl (stmt
);
649 /* Find if the SSA variable is accessed on the right hand side of
650 gimple assign STMT. */
653 ssa_accessed_in_assign_rhs (gimple stmt
, struct ssa_acc_in_tree
*a
)
656 a
->t_code
= gimple_assign_rhs_code (stmt
);
664 case VIEW_CONVERT_EXPR
:
665 ssa_accessed_in_tree (gimple_assign_rhs1 (stmt
), a
);
667 case POINTER_PLUS_EXPR
:
670 op1
= gimple_assign_rhs1 (stmt
);
671 op2
= gimple_assign_rhs2 (stmt
);
673 if (op1
== a
->ssa_var
)
678 else if (op2
== a
->ssa_var
)
689 /* Record the access/allocation site information for matrix MI so we can
690 handle it later in transformation. */
692 record_access_alloc_site_info (struct matrix_info
*mi
, gimple stmt
, tree offset
,
693 tree index
, int level
, bool is_alloc
)
695 struct access_site_info
*acc_info
;
698 mi
->access_l
= VEC_alloc (access_site_info_p
, heap
, 100);
701 = (struct access_site_info
*)
702 xcalloc (1, sizeof (struct access_site_info
));
703 acc_info
->stmt
= stmt
;
704 acc_info
->offset
= offset
;
705 acc_info
->index
= index
;
706 acc_info
->function_decl
= current_function_decl
;
707 acc_info
->level
= level
;
708 acc_info
->is_alloc
= is_alloc
;
710 VEC_safe_push (access_site_info_p
, heap
, mi
->access_l
, acc_info
);
714 /* Record the malloc as the allocation site of the given LEVEL. But
715 first we Make sure that all the size parameters passed to malloc in
716 all the allocation sites could be pre-calculated before the call to
717 the malloc of level 0 (the main malloc call). */
719 add_allocation_site (struct matrix_info
*mi
, gimple stmt
, int level
)
721 struct malloc_call_data mcd
;
723 /* Make sure that the allocation sites are in the same function. */
724 if (!mi
->allocation_function_decl
)
725 mi
->allocation_function_decl
= current_function_decl
;
726 else if (mi
->allocation_function_decl
!= current_function_decl
)
728 int min_malloc_level
;
730 gcc_assert (mi
->malloc_for_level
);
732 /* Find the minimum malloc level that already has been seen;
733 we known its allocation function must be
734 MI->allocation_function_decl since it's different than
735 CURRENT_FUNCTION_DECL then the escaping level should be
736 MIN (LEVEL, MIN_MALLOC_LEVEL) - 1 , and the allocation function
737 must be set accordingly. */
738 for (min_malloc_level
= 0;
739 min_malloc_level
< mi
->max_malloced_level
740 && mi
->malloc_for_level
[min_malloc_level
]; min_malloc_level
++);
741 if (level
< min_malloc_level
)
743 mi
->allocation_function_decl
= current_function_decl
;
744 mark_min_matrix_escape_level (mi
, min_malloc_level
, stmt
);
748 mark_min_matrix_escape_level (mi
, level
, stmt
);
749 /* cannot be that (level == min_malloc_level)
750 we would have returned earlier. */
755 /* Find the correct malloc information. */
756 collect_data_for_malloc_call (stmt
, &mcd
);
758 /* We accept only calls to malloc function; we do not accept
759 calls like calloc and realloc. */
760 if (!mi
->malloc_for_level
)
762 mi
->malloc_for_level
= XCNEWVEC (gimple
, level
+ 1);
763 mi
->max_malloced_level
= level
+ 1;
765 else if (mi
->max_malloced_level
<= level
)
768 = XRESIZEVEC (gimple
, mi
->malloc_for_level
, level
+ 1);
770 /* Zero the newly allocated items. */
771 memset (&(mi
->malloc_for_level
[mi
->max_malloced_level
+ 1]),
772 0, (level
- mi
->max_malloced_level
) * sizeof (tree
));
774 mi
->max_malloced_level
= level
+ 1;
776 mi
->malloc_for_level
[level
] = stmt
;
779 /* Given an assignment statement STMT that we know that its
780 left-hand-side is the matrix MI variable, we traverse the immediate
781 uses backwards until we get to a malloc site. We make sure that
782 there is one and only one malloc site that sets this variable. When
783 we are performing the flattening we generate a new variable that
784 will hold the size for each dimension; each malloc that allocates a
785 dimension has the size parameter; we use that parameter to
786 initialize the dimension size variable so we can use it later in
787 the address calculations. LEVEL is the dimension we're inspecting.
788 Return if STMT is related to an allocation site. */
791 analyze_matrix_allocation_site (struct matrix_info
*mi
, gimple stmt
,
792 int level
, sbitmap visited
)
794 if (gimple_assign_copy_p (stmt
) || gimple_assign_cast_p (stmt
))
796 tree rhs
= gimple_assign_rhs1 (stmt
);
798 if (TREE_CODE (rhs
) == SSA_NAME
)
800 gimple def
= SSA_NAME_DEF_STMT (rhs
);
802 analyze_matrix_allocation_site (mi
, def
, level
, visited
);
805 /* If we are back to the original matrix variable then we
806 are sure that this is analyzed as an access site. */
807 else if (rhs
== mi
->decl
)
810 /* A result of call to malloc. */
811 else if (is_gimple_call (stmt
))
813 int call_flags
= gimple_call_flags (stmt
);
815 if (!(call_flags
& ECF_MALLOC
))
817 mark_min_matrix_escape_level (mi
, level
, stmt
);
823 const char *malloc_fname
;
825 malloc_fn_decl
= gimple_call_fndecl (stmt
);
826 if (malloc_fn_decl
== NULL_TREE
)
828 mark_min_matrix_escape_level (mi
, level
, stmt
);
831 malloc_fname
= IDENTIFIER_POINTER (DECL_NAME (malloc_fn_decl
));
832 if (DECL_FUNCTION_CODE (malloc_fn_decl
) != BUILT_IN_MALLOC
)
836 "Matrix %s is an argument to function %s\n",
837 get_name (mi
->decl
), get_name (malloc_fn_decl
));
838 mark_min_matrix_escape_level (mi
, level
, stmt
);
842 /* This is a call to malloc of level 'level'.
843 mi->max_malloced_level-1 == level means that we've
844 seen a malloc statement of level 'level' before.
845 If the statement is not the same one that we've
846 seen before, then there's another malloc statement
847 for the same level, which means that we need to mark
849 if (mi
->malloc_for_level
850 && mi
->max_malloced_level
-1 == level
851 && mi
->malloc_for_level
[level
] != stmt
)
853 mark_min_matrix_escape_level (mi
, level
, stmt
);
857 add_allocation_site (mi
, stmt
, level
);
860 /* Looks like we don't know what is happening in this
861 statement so be in the safe side and mark it as escaping. */
862 mark_min_matrix_escape_level (mi
, level
, stmt
);
865 /* The transposing decision making.
866 In order to to calculate the profitability of transposing, we collect two
867 types of information regarding the accesses:
868 1. profiling information used to express the hotness of an access, that
869 is how often the matrix is accessed by this access site (count of the
871 2. which dimension in the access site is iterated by the inner
872 most loop containing this access.
874 The matrix will have a calculated value of weighted hotness for each
876 Intuitively the hotness level of a dimension is a function of how
877 many times it was the most frequently accessed dimension in the
878 highly executed access sites of this matrix.
880 As computed by following equation:
883 \ \ dim_hot_level[i] +=
886 acc[j]->dim[i]->iter_by_inner_loop * count(j)
888 Where n is the number of dims and m is the number of the matrix
889 access sites. acc[j]->dim[i]->iter_by_inner_loop is 1 if acc[j]
890 iterates over dim[i] in innermost loop, and is 0 otherwise.
892 The organization of the new matrix should be according to the
893 hotness of each dimension. The hotness of the dimension implies
894 the locality of the elements.*/
896 analyze_transpose (void **slot
, void *data ATTRIBUTE_UNUSED
)
898 struct matrix_info
*mi
= (struct matrix_info
*) *slot
;
899 int min_escape_l
= mi
->min_indirect_level_escape
;
902 struct access_site_info
*acc_info
;
905 if (min_escape_l
< 2 || !mi
->access_l
)
910 VEC_iterate (access_site_info_p
, mi
->access_l
, i
, acc_info
);
913 VEC_free (access_site_info_p
, heap
, mi
->access_l
);
918 if (!mi
->dim_hot_level
)
920 (gcov_type
*) xcalloc (min_escape_l
, sizeof (gcov_type
));
923 for (i
= 0; VEC_iterate (access_site_info_p
, mi
->access_l
, i
, acc_info
);
926 if (gimple_assign_rhs_code (acc_info
->stmt
) == POINTER_PLUS_EXPR
927 && acc_info
->level
< min_escape_l
)
929 loop
= loop_containing_stmt (acc_info
->stmt
);
930 if (!loop
|| loop
->inner
)
935 if (simple_iv (loop
, loop
, acc_info
->offset
, &iv
, true))
941 istep
= int_cst_value (iv
.step
);
944 acc_info
->iterated_by_inner_most_loop_p
= 1;
945 mi
->dim_hot_level
[acc_info
->level
] +=
946 gimple_bb (acc_info
->stmt
)->count
;
954 VEC_free (access_site_info_p
, heap
, mi
->access_l
);
959 /* Find the index which defines the OFFSET from base.
960 We walk from use to def until we find how the offset was defined. */
962 get_index_from_offset (tree offset
, gimple def_stmt
)
964 tree op1
, op2
, index
;
966 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
968 if ((gimple_assign_copy_p (def_stmt
) || gimple_assign_cast_p (def_stmt
))
969 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == SSA_NAME
)
970 return get_index_from_offset (offset
,
971 SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt
)));
972 else if (is_gimple_assign (def_stmt
)
973 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
)
975 op1
= gimple_assign_rhs1 (def_stmt
);
976 op2
= gimple_assign_rhs2 (def_stmt
);
977 if (TREE_CODE (op1
) != INTEGER_CST
&& TREE_CODE (op2
) != INTEGER_CST
)
979 index
= (TREE_CODE (op1
) == INTEGER_CST
) ? op2
: op1
;
986 /* update MI->dimension_type_size[CURRENT_INDIRECT_LEVEL] with the size
987 of the type related to the SSA_VAR, or the type related to the
988 lhs of STMT, in the case that it is an INDIRECT_REF. */
990 update_type_size (struct matrix_info
*mi
, gimple stmt
, tree ssa_var
,
991 int current_indirect_level
)
994 HOST_WIDE_INT type_size
;
996 /* Update type according to the type of the INDIRECT_REF expr. */
997 if (is_gimple_assign (stmt
)
998 && TREE_CODE (gimple_assign_lhs (stmt
)) == INDIRECT_REF
)
1000 lhs
= gimple_assign_lhs (stmt
);
1001 gcc_assert (POINTER_TYPE_P
1002 (TREE_TYPE (SSA_NAME_VAR (TREE_OPERAND (lhs
, 0)))));
1004 int_size_in_bytes (TREE_TYPE
1006 (SSA_NAME_VAR (TREE_OPERAND (lhs
, 0)))));
1009 type_size
= int_size_in_bytes (TREE_TYPE (ssa_var
));
1011 /* Record the size of elements accessed (as a whole)
1012 in the current indirection level (dimension). If the size of
1013 elements is not known at compile time, mark it as escaping. */
1015 mark_min_matrix_escape_level (mi
, current_indirect_level
, stmt
);
1018 int l
= current_indirect_level
;
1020 if (!mi
->dimension_type_size
)
1022 mi
->dimension_type_size
1023 = (HOST_WIDE_INT
*) xcalloc (l
+ 1, sizeof (HOST_WIDE_INT
));
1024 mi
->dimension_type_size_len
= l
+ 1;
1026 else if (mi
->dimension_type_size_len
< l
+ 1)
1028 mi
->dimension_type_size
1029 = (HOST_WIDE_INT
*) xrealloc (mi
->dimension_type_size
,
1030 (l
+ 1) * sizeof (HOST_WIDE_INT
));
1031 memset (&mi
->dimension_type_size
[mi
->dimension_type_size_len
],
1032 0, (l
+ 1 - mi
->dimension_type_size_len
)
1033 * sizeof (HOST_WIDE_INT
));
1034 mi
->dimension_type_size_len
= l
+ 1;
1036 /* Make sure all the accesses in the same level have the same size
1038 if (!mi
->dimension_type_size
[l
])
1039 mi
->dimension_type_size
[l
] = type_size
;
1040 else if (mi
->dimension_type_size
[l
] != type_size
)
1041 mark_min_matrix_escape_level (mi
, l
, stmt
);
1045 /* USE_STMT represents a GIMPLE_CALL, where one of the arguments is the
1046 ssa var that we want to check because it came from some use of matrix
1047 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so
1051 analyze_accesses_for_call_stmt (struct matrix_info
*mi
, tree ssa_var
,
1052 gimple use_stmt
, int current_indirect_level
)
1054 tree fndecl
= gimple_call_fndecl (use_stmt
);
1056 if (gimple_call_lhs (use_stmt
))
1058 tree lhs
= gimple_call_lhs (use_stmt
);
1059 struct ssa_acc_in_tree lhs_acc
, rhs_acc
;
1061 memset (&lhs_acc
, 0, sizeof (lhs_acc
));
1062 memset (&rhs_acc
, 0, sizeof (rhs_acc
));
1064 lhs_acc
.ssa_var
= ssa_var
;
1065 lhs_acc
.t_code
= ERROR_MARK
;
1066 ssa_accessed_in_tree (lhs
, &lhs_acc
);
1067 rhs_acc
.ssa_var
= ssa_var
;
1068 rhs_acc
.t_code
= ERROR_MARK
;
1069 ssa_accessed_in_call_rhs (use_stmt
, &rhs_acc
);
1071 /* The SSA must be either in the left side or in the right side,
1072 to understand what is happening.
1073 In case the SSA_NAME is found in both sides we should be escaping
1074 at this level because in this case we cannot calculate the
1075 address correctly. */
1076 if ((lhs_acc
.var_found
&& rhs_acc
.var_found
1077 && lhs_acc
.t_code
== INDIRECT_REF
)
1078 || (!rhs_acc
.var_found
&& !lhs_acc
.var_found
))
1080 mark_min_matrix_escape_level (mi
, current_indirect_level
, use_stmt
);
1081 return current_indirect_level
;
1083 gcc_assert (!rhs_acc
.var_found
|| !lhs_acc
.var_found
);
1085 /* If we are storing to the matrix at some level, then mark it as
1086 escaping at that level. */
1087 if (lhs_acc
.var_found
)
1089 int l
= current_indirect_level
+ 1;
1091 gcc_assert (lhs_acc
.t_code
== INDIRECT_REF
);
1092 mark_min_matrix_escape_level (mi
, l
, use_stmt
);
1093 return current_indirect_level
;
1099 if (DECL_FUNCTION_CODE (fndecl
) != BUILT_IN_FREE
)
1103 "Matrix %s: Function call %s, level %d escapes.\n",
1104 get_name (mi
->decl
), get_name (fndecl
),
1105 current_indirect_level
);
1106 mark_min_matrix_escape_level (mi
, current_indirect_level
, use_stmt
);
1108 else if (mi
->free_stmts
[current_indirect_level
].stmt
!= NULL
1109 && mi
->free_stmts
[current_indirect_level
].stmt
!= use_stmt
)
1110 mark_min_matrix_escape_level (mi
, current_indirect_level
, use_stmt
);
1113 /*Record the free statements so we can delete them
1115 int l
= current_indirect_level
;
1117 mi
->free_stmts
[l
].stmt
= use_stmt
;
1118 mi
->free_stmts
[l
].func
= current_function_decl
;
1121 return current_indirect_level
;
1124 /* USE_STMT represents a phi node of the ssa var that we want to
1125 check because it came from some use of matrix
1127 We check all the escaping levels that get to the PHI node
1128 and make sure they are all the same escaping;
1129 if not (which is rare) we let the escaping level be the
1130 minimum level that gets into that PHI because starting from
1131 that level we cannot expect the behavior of the indirections.
1132 CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1135 analyze_accesses_for_phi_node (struct matrix_info
*mi
, gimple use_stmt
,
1136 int current_indirect_level
, sbitmap visited
,
1137 bool record_accesses
)
1140 struct matrix_access_phi_node tmp_maphi
, *maphi
, **pmaphi
;
1142 tmp_maphi
.phi
= use_stmt
;
1143 if ((maphi
= (struct matrix_access_phi_node
*)
1144 htab_find (htab_mat_acc_phi_nodes
, &tmp_maphi
)))
1146 if (maphi
->indirection_level
== current_indirect_level
)
1150 int level
= MIN (maphi
->indirection_level
,
1151 current_indirect_level
);
1155 maphi
->indirection_level
= level
;
1156 for (j
= 0; j
< gimple_phi_num_args (use_stmt
); j
++)
1158 tree def
= PHI_ARG_DEF (use_stmt
, j
);
1160 if (gimple_code (SSA_NAME_DEF_STMT (def
)) != GIMPLE_PHI
)
1161 stmt
= SSA_NAME_DEF_STMT (def
);
1163 mark_min_matrix_escape_level (mi
, level
, stmt
);
1167 maphi
= (struct matrix_access_phi_node
*)
1168 xcalloc (1, sizeof (struct matrix_access_phi_node
));
1169 maphi
->phi
= use_stmt
;
1170 maphi
->indirection_level
= current_indirect_level
;
1172 /* Insert to hash table. */
1173 pmaphi
= (struct matrix_access_phi_node
**)
1174 htab_find_slot (htab_mat_acc_phi_nodes
, maphi
, INSERT
);
1175 gcc_assert (pmaphi
);
1178 if (!TEST_BIT (visited
, SSA_NAME_VERSION (PHI_RESULT (use_stmt
))))
1180 SET_BIT (visited
, SSA_NAME_VERSION (PHI_RESULT (use_stmt
)));
1181 analyze_matrix_accesses (mi
, PHI_RESULT (use_stmt
),
1182 current_indirect_level
, false, visited
,
1184 RESET_BIT (visited
, SSA_NAME_VERSION (PHI_RESULT (use_stmt
)));
1188 /* USE_STMT represents an assign statement (the rhs or lhs include
1189 the ssa var that we want to check because it came from some use of matrix
1190 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1193 analyze_accesses_for_assign_stmt (struct matrix_info
*mi
, tree ssa_var
,
1194 gimple use_stmt
, int current_indirect_level
,
1195 bool last_op
, sbitmap visited
,
1196 bool record_accesses
)
1198 tree lhs
= gimple_get_lhs (use_stmt
);
1199 struct ssa_acc_in_tree lhs_acc
, rhs_acc
;
1201 memset (&lhs_acc
, 0, sizeof (lhs_acc
));
1202 memset (&rhs_acc
, 0, sizeof (rhs_acc
));
1204 lhs_acc
.ssa_var
= ssa_var
;
1205 lhs_acc
.t_code
= ERROR_MARK
;
1206 ssa_accessed_in_tree (lhs
, &lhs_acc
);
1207 rhs_acc
.ssa_var
= ssa_var
;
1208 rhs_acc
.t_code
= ERROR_MARK
;
1209 ssa_accessed_in_assign_rhs (use_stmt
, &rhs_acc
);
1211 /* The SSA must be either in the left side or in the right side,
1212 to understand what is happening.
1213 In case the SSA_NAME is found in both sides we should be escaping
1214 at this level because in this case we cannot calculate the
1215 address correctly. */
1216 if ((lhs_acc
.var_found
&& rhs_acc
.var_found
1217 && lhs_acc
.t_code
== INDIRECT_REF
)
1218 || (!rhs_acc
.var_found
&& !lhs_acc
.var_found
))
1220 mark_min_matrix_escape_level (mi
, current_indirect_level
, use_stmt
);
1221 return current_indirect_level
;
1223 gcc_assert (!rhs_acc
.var_found
|| !lhs_acc
.var_found
);
1225 /* If we are storing to the matrix at some level, then mark it as
1226 escaping at that level. */
1227 if (lhs_acc
.var_found
)
1229 int l
= current_indirect_level
+ 1;
1231 gcc_assert (lhs_acc
.t_code
== INDIRECT_REF
);
1233 if (!(gimple_assign_copy_p (use_stmt
)
1234 || gimple_assign_cast_p (use_stmt
))
1235 || (TREE_CODE (gimple_assign_rhs1 (use_stmt
)) != SSA_NAME
))
1236 mark_min_matrix_escape_level (mi
, l
, use_stmt
);
1239 gimple def_stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (use_stmt
));
1240 analyze_matrix_allocation_site (mi
, def_stmt
, l
, visited
);
1241 if (record_accesses
)
1242 record_access_alloc_site_info (mi
, use_stmt
, NULL_TREE
,
1243 NULL_TREE
, l
, true);
1244 update_type_size (mi
, use_stmt
, NULL
, l
);
1246 return current_indirect_level
;
1248 /* Now, check the right-hand-side, to see how the SSA variable
1250 if (rhs_acc
.var_found
)
1252 if (rhs_acc
.t_code
!= INDIRECT_REF
1253 && rhs_acc
.t_code
!= POINTER_PLUS_EXPR
&& rhs_acc
.t_code
!= SSA_NAME
)
1255 mark_min_matrix_escape_level (mi
, current_indirect_level
, use_stmt
);
1256 return current_indirect_level
;
1258 /* If the access in the RHS has an indirection increase the
1259 indirection level. */
1260 if (rhs_acc
.t_code
== INDIRECT_REF
)
1262 if (record_accesses
)
1263 record_access_alloc_site_info (mi
, use_stmt
, NULL_TREE
,
1265 current_indirect_level
, true);
1266 current_indirect_level
+= 1;
1268 else if (rhs_acc
.t_code
== POINTER_PLUS_EXPR
)
1270 gcc_assert (rhs_acc
.second_op
);
1272 /* Currently we support only one PLUS expression on the
1273 SSA_NAME that holds the base address of the current
1274 indirection level; to support more general case there
1275 is a need to hold a stack of expressions and regenerate
1276 the calculation later. */
1277 mark_min_matrix_escape_level (mi
, current_indirect_level
,
1284 op1
= gimple_assign_rhs1 (use_stmt
);
1285 op2
= gimple_assign_rhs2 (use_stmt
);
1287 op2
= (op1
== ssa_var
) ? op2
: op1
;
1288 if (TREE_CODE (op2
) == INTEGER_CST
)
1290 build_int_cst (TREE_TYPE (op1
),
1291 TREE_INT_CST_LOW (op2
) /
1292 int_size_in_bytes (TREE_TYPE (op1
)));
1296 get_index_from_offset (op2
, SSA_NAME_DEF_STMT (op2
));
1297 if (index
== NULL_TREE
)
1299 mark_min_matrix_escape_level (mi
,
1300 current_indirect_level
,
1302 return current_indirect_level
;
1305 if (record_accesses
)
1306 record_access_alloc_site_info (mi
, use_stmt
, op2
,
1308 current_indirect_level
, false);
1311 /* If we are storing this level of indirection mark it as
1313 if (lhs_acc
.t_code
== INDIRECT_REF
|| TREE_CODE (lhs
) != SSA_NAME
)
1315 int l
= current_indirect_level
;
1317 /* One exception is when we are storing to the matrix
1318 variable itself; this is the case of malloc, we must make
1319 sure that it's the one and only one call to malloc so
1320 we call analyze_matrix_allocation_site to check
1322 if (TREE_CODE (lhs
) != VAR_DECL
|| lhs
!= mi
->decl
)
1323 mark_min_matrix_escape_level (mi
, current_indirect_level
,
1327 /* Also update the escaping level. */
1328 analyze_matrix_allocation_site (mi
, use_stmt
, l
, visited
);
1329 if (record_accesses
)
1330 record_access_alloc_site_info (mi
, use_stmt
, NULL_TREE
,
1331 NULL_TREE
, l
, true);
1336 /* We are placing it in an SSA, follow that SSA. */
1337 analyze_matrix_accesses (mi
, lhs
,
1338 current_indirect_level
,
1339 rhs_acc
.t_code
== POINTER_PLUS_EXPR
,
1340 visited
, record_accesses
);
1343 return current_indirect_level
;
1346 /* Given a SSA_VAR (coming from a use statement of the matrix MI),
1347 follow its uses and level of indirection and find out the minimum
1348 indirection level it escapes in (the highest dimension) and the maximum
1349 level it is accessed in (this will be the actual dimension of the
1350 matrix). The information is accumulated in MI.
1351 We look at the immediate uses, if one escapes we finish; if not,
1352 we make a recursive call for each one of the immediate uses of the
1353 resulting SSA name. */
1355 analyze_matrix_accesses (struct matrix_info
*mi
, tree ssa_var
,
1356 int current_indirect_level
, bool last_op
,
1357 sbitmap visited
, bool record_accesses
)
1359 imm_use_iterator imm_iter
;
1360 use_operand_p use_p
;
1362 update_type_size (mi
, SSA_NAME_DEF_STMT (ssa_var
), ssa_var
,
1363 current_indirect_level
);
1365 /* We don't go beyond the escaping level when we are performing the
1366 flattening. NOTE: we keep the last indirection level that doesn't
1368 if (mi
->min_indirect_level_escape
> -1
1369 && mi
->min_indirect_level_escape
<= current_indirect_level
)
1372 /* Now go over the uses of the SSA_NAME and check how it is used in
1373 each one of them. We are mainly looking for the pattern INDIRECT_REF,
1374 then a POINTER_PLUS_EXPR, then INDIRECT_REF etc. while in between there could
1375 be any number of copies and casts. */
1376 gcc_assert (TREE_CODE (ssa_var
) == SSA_NAME
);
1378 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, ssa_var
)
1380 gimple use_stmt
= USE_STMT (use_p
);
1381 if (gimple_code (use_stmt
) == GIMPLE_PHI
)
1382 /* We check all the escaping levels that get to the PHI node
1383 and make sure they are all the same escaping;
1384 if not (which is rare) we let the escaping level be the
1385 minimum level that gets into that PHI because starting from
1386 that level we cannot expect the behavior of the indirections. */
1388 analyze_accesses_for_phi_node (mi
, use_stmt
, current_indirect_level
,
1389 visited
, record_accesses
);
1391 else if (is_gimple_call (use_stmt
))
1392 analyze_accesses_for_call_stmt (mi
, ssa_var
, use_stmt
,
1393 current_indirect_level
);
1394 else if (is_gimple_assign (use_stmt
))
1395 current_indirect_level
=
1396 analyze_accesses_for_assign_stmt (mi
, ssa_var
, use_stmt
,
1397 current_indirect_level
, last_op
,
1398 visited
, record_accesses
);
1408 /* A walk_tree function to go over the VAR_DECL, PARM_DECL nodes of
1409 the malloc size expression and check that those aren't changed
1410 over the function. */
1412 check_var_notmodified_p (tree
* tp
, int *walk_subtrees
, void *data
)
1416 check_var_data
*callback_data
= (check_var_data
*) data
;
1417 tree fn
= callback_data
->fn
;
1418 gimple_stmt_iterator gsi
;
1421 if (TREE_CODE (t
) != VAR_DECL
&& TREE_CODE (t
) != PARM_DECL
)
1424 FOR_EACH_BB_FN (bb
, DECL_STRUCT_FUNCTION (fn
))
1426 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1428 stmt
= gsi_stmt (gsi
);
1429 if (!is_gimple_assign (stmt
) && !is_gimple_call (stmt
))
1431 if (gimple_get_lhs (stmt
) == t
)
1433 callback_data
->stmt
= stmt
;
1442 /* Go backwards in the use-def chains and find out the expression
1443 represented by the possible SSA name in STMT, until it is composed
1444 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1445 we make sure that all the arguments represent the same subexpression,
1446 otherwise we fail. */
1449 can_calculate_stmt_before_stmt (gimple stmt
, sbitmap visited
)
1452 enum tree_code code
;
1454 switch (gimple_code (stmt
))
1457 code
= gimple_assign_rhs_code (stmt
);
1458 op1
= gimple_assign_rhs1 (stmt
);
1462 case POINTER_PLUS_EXPR
:
1467 op2
= gimple_assign_rhs2 (stmt
);
1468 op1
= can_calculate_expr_before_stmt (op1
, visited
);
1471 op2
= can_calculate_expr_before_stmt (op2
, visited
);
1473 return fold_build2 (code
, gimple_expr_type (stmt
), op1
, op2
);
1477 res
= can_calculate_expr_before_stmt (op1
, visited
);
1478 if (res
!= NULL_TREE
)
1479 return build1 (code
, gimple_expr_type (stmt
), res
);
1484 if (gimple_assign_single_p (stmt
))
1485 return can_calculate_expr_before_stmt (op1
, visited
);
1495 /* Make sure all the arguments represent the same value. */
1496 for (j
= 0; j
< gimple_phi_num_args (stmt
); j
++)
1499 tree def
= PHI_ARG_DEF (stmt
, j
);
1501 new_res
= can_calculate_expr_before_stmt (def
, visited
);
1502 if (res
== NULL_TREE
)
1504 else if (!new_res
|| !expressions_equal_p (res
, new_res
))
1515 /* Go backwards in the use-def chains and find out the expression
1516 represented by the possible SSA name in EXPR, until it is composed
1517 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1518 we make sure that all the arguments represent the same subexpression,
1519 otherwise we fail. */
1521 can_calculate_expr_before_stmt (tree expr
, sbitmap visited
)
1526 switch (TREE_CODE (expr
))
1529 /* Case of loop, we don't know to represent this expression. */
1530 if (TEST_BIT (visited
, SSA_NAME_VERSION (expr
)))
1533 SET_BIT (visited
, SSA_NAME_VERSION (expr
));
1534 def_stmt
= SSA_NAME_DEF_STMT (expr
);
1535 res
= can_calculate_stmt_before_stmt (def_stmt
, visited
);
1536 RESET_BIT (visited
, SSA_NAME_VERSION (expr
));
1548 /* There should be only one allocation function for the dimensions
1549 that don't escape. Here we check the allocation sites in this
1550 function. We must make sure that all the dimensions are allocated
1551 using malloc and that the malloc size parameter expression could be
1552 pre-calculated before the call to the malloc of dimension 0.
1554 Given a candidate matrix for flattening -- MI -- check if it's
1555 appropriate for flattening -- we analyze the allocation
1556 sites that we recorded in the previous analysis. The result of the
1557 analysis is a level of indirection (matrix dimension) in which the
1558 flattening is safe. We check the following conditions:
1559 1. There is only one allocation site for each dimension.
1560 2. The allocation sites of all the dimensions are in the same
1562 (The above two are being taken care of during the analysis when
1563 we check the allocation site).
1564 3. All the dimensions that we flatten are allocated at once; thus
1565 the total size must be known before the allocation of the
1566 dimension 0 (top level) -- we must make sure we represent the
1567 size of the allocation as an expression of global parameters or
1568 constants and that those doesn't change over the function. */
1571 check_allocation_function (void **slot
, void *data ATTRIBUTE_UNUSED
)
1574 gimple_stmt_iterator gsi
;
1575 basic_block bb_level_0
;
1576 struct matrix_info
*mi
= (struct matrix_info
*) *slot
;
1579 if (!mi
->malloc_for_level
)
1582 visited
= sbitmap_alloc (num_ssa_names
);
1584 /* Do nothing if the current function is not the allocation
1586 if (mi
->allocation_function_decl
!= current_function_decl
1587 /* We aren't in the main allocation function yet. */
1588 || !mi
->malloc_for_level
[0])
1591 for (level
= 1; level
< mi
->max_malloced_level
; level
++)
1592 if (!mi
->malloc_for_level
[level
])
1595 mark_min_matrix_escape_level (mi
, level
, NULL
);
1597 gsi
= gsi_for_stmt (mi
->malloc_for_level
[0]);
1598 bb_level_0
= gsi
.bb
;
1600 /* Check if the expression of the size passed to malloc could be
1601 pre-calculated before the malloc of level 0. */
1602 for (level
= 1; level
< mi
->min_indirect_level_escape
; level
++)
1606 struct malloc_call_data mcd
= {NULL
, NULL_TREE
, NULL_TREE
};
1608 call_stmt
= mi
->malloc_for_level
[level
];
1610 /* Find the correct malloc information. */
1611 collect_data_for_malloc_call (call_stmt
, &mcd
);
1613 /* No need to check anticipation for constants. */
1614 if (TREE_CODE (mcd
.size_var
) == INTEGER_CST
)
1616 if (!mi
->dimension_size
)
1618 mi
->dimension_size
=
1619 (tree
*) xcalloc (mi
->min_indirect_level_escape
,
1621 mi
->dimension_size_orig
=
1622 (tree
*) xcalloc (mi
->min_indirect_level_escape
,
1625 mi
->dimension_size
[level
] = mcd
.size_var
;
1626 mi
->dimension_size_orig
[level
] = mcd
.size_var
;
1629 /* ??? Here we should also add the way to calculate the size
1630 expression not only know that it is anticipated. */
1631 sbitmap_zero (visited
);
1632 size
= can_calculate_expr_before_stmt (mcd
.size_var
, visited
);
1633 if (size
== NULL_TREE
)
1635 mark_min_matrix_escape_level (mi
, level
, call_stmt
);
1638 "Matrix %s: Cannot calculate the size of allocation, escaping at level %d\n",
1639 get_name (mi
->decl
), level
);
1642 if (!mi
->dimension_size
)
1644 mi
->dimension_size
=
1645 (tree
*) xcalloc (mi
->min_indirect_level_escape
, sizeof (tree
));
1646 mi
->dimension_size_orig
=
1647 (tree
*) xcalloc (mi
->min_indirect_level_escape
, sizeof (tree
));
1649 mi
->dimension_size
[level
] = size
;
1650 mi
->dimension_size_orig
[level
] = size
;
1653 /* We don't need those anymore. */
1654 for (level
= mi
->min_indirect_level_escape
;
1655 level
< mi
->max_malloced_level
; level
++)
1656 mi
->malloc_for_level
[level
] = NULL
;
1660 /* Track all access and allocation sites. */
1662 find_sites_in_func (bool record
)
1664 sbitmap visited_stmts_1
;
1666 gimple_stmt_iterator gsi
;
1669 struct matrix_info tmpmi
, *mi
;
1671 visited_stmts_1
= sbitmap_alloc (num_ssa_names
);
1675 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1679 stmt
= gsi_stmt (gsi
);
1680 lhs
= gimple_get_lhs (stmt
);
1681 if (lhs
!= NULL_TREE
1682 && TREE_CODE (lhs
) == VAR_DECL
)
1685 if ((mi
= (struct matrix_info
*) htab_find (matrices_to_reorg
,
1688 sbitmap_zero (visited_stmts_1
);
1689 analyze_matrix_allocation_site (mi
, stmt
, 0, visited_stmts_1
);
1692 if (is_gimple_assign (stmt
)
1693 && gimple_assign_single_p (stmt
)
1694 && TREE_CODE (lhs
) == SSA_NAME
1695 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == VAR_DECL
)
1697 tmpmi
.decl
= gimple_assign_rhs1 (stmt
);
1698 if ((mi
= (struct matrix_info
*) htab_find (matrices_to_reorg
,
1701 sbitmap_zero (visited_stmts_1
);
1702 analyze_matrix_accesses (mi
, lhs
, 0,
1703 false, visited_stmts_1
, record
);
1708 sbitmap_free (visited_stmts_1
);
1711 /* Traverse the use-def chains to see if there are matrices that
1712 are passed through pointers and we cannot know how they are accessed.
1713 For each SSA-name defined by a global variable of our interest,
1714 we traverse the use-def chains of the SSA and follow the indirections,
1715 and record in what level of indirection the use of the variable
1716 escapes. A use of a pointer escapes when it is passed to a function,
1717 stored into memory or assigned (except in malloc and free calls). */
1720 record_all_accesses_in_func (void)
1723 sbitmap visited_stmts_1
;
1725 visited_stmts_1
= sbitmap_alloc (num_ssa_names
);
1727 for (i
= 0; i
< num_ssa_names
; i
++)
1729 struct matrix_info tmpmi
, *mi
;
1730 tree ssa_var
= ssa_name (i
);
1734 || !is_gimple_assign (SSA_NAME_DEF_STMT (ssa_var
))
1735 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (ssa_var
)))
1737 rhs
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (ssa_var
));
1738 lhs
= gimple_assign_lhs (SSA_NAME_DEF_STMT (ssa_var
));
1739 if (TREE_CODE (rhs
) != VAR_DECL
&& TREE_CODE (lhs
) != VAR_DECL
)
1742 /* If the RHS is a matrix that we want to analyze, follow the def-use
1743 chain for this SSA_VAR and check for escapes or apply the
1746 if ((mi
= (struct matrix_info
*) htab_find (matrices_to_reorg
, &tmpmi
)))
1748 /* This variable will track the visited PHI nodes, so we can limit
1749 its size to the maximum number of SSA names. */
1750 sbitmap_zero (visited_stmts_1
);
1751 analyze_matrix_accesses (mi
, ssa_var
,
1752 0, false, visited_stmts_1
, true);
1756 sbitmap_free (visited_stmts_1
);
1759 /* Used when we want to convert the expression: RESULT = something *
1760 ORIG to RESULT = something * NEW_VAL. If ORIG and NEW_VAL are power
1761 of 2, shift operations can be done, else division and
1765 compute_offset (HOST_WIDE_INT orig
, HOST_WIDE_INT new_val
, tree result
)
1769 tree result1
, ratio
, log
, orig_tree
, new_tree
;
1771 x
= exact_log2 (orig
);
1772 y
= exact_log2 (new_val
);
1774 if (x
!= -1 && y
!= -1)
1780 log
= build_int_cst (TREE_TYPE (result
), x
- y
);
1782 fold_build2 (LSHIFT_EXPR
, TREE_TYPE (result
), result
, log
);
1785 log
= build_int_cst (TREE_TYPE (result
), y
- x
);
1786 result1
= fold_build2 (RSHIFT_EXPR
, TREE_TYPE (result
), result
, log
);
1790 orig_tree
= build_int_cst (TREE_TYPE (result
), orig
);
1791 new_tree
= build_int_cst (TREE_TYPE (result
), new_val
);
1792 ratio
= fold_build2 (TRUNC_DIV_EXPR
, TREE_TYPE (result
), result
, orig_tree
);
1793 result1
= fold_build2 (MULT_EXPR
, TREE_TYPE (result
), ratio
, new_tree
);
1799 /* We know that we are allowed to perform matrix flattening (according to the
1800 escape analysis), so we traverse the use-def chains of the SSA vars
1801 defined by the global variables pointing to the matrices of our interest.
1802 in each use of the SSA we calculate the offset from the base address
1803 according to the following equation:
1805 a[I1][I2]...[Ik] , where D1..Dk is the length of each dimension and the
1806 escaping level is m <= k, and a' is the new allocated matrix,
1807 will be translated to :
1812 b = a' + I1*D2...*Dm + I2*D3...Dm + ... + Im
1816 transform_access_sites (void **slot
, void *data ATTRIBUTE_UNUSED
)
1818 gimple_stmt_iterator gsi
;
1819 struct matrix_info
*mi
= (struct matrix_info
*) *slot
;
1820 int min_escape_l
= mi
->min_indirect_level_escape
;
1821 struct access_site_info
*acc_info
;
1822 enum tree_code code
;
1825 if (min_escape_l
< 2 || !mi
->access_l
)
1827 for (i
= 0; VEC_iterate (access_site_info_p
, mi
->access_l
, i
, acc_info
);
1830 /* This is possible because we collect the access sites before
1831 we determine the final minimum indirection level. */
1832 if (acc_info
->level
>= min_escape_l
)
1837 if (acc_info
->is_alloc
)
1839 if (acc_info
->level
>= 0 && gimple_bb (acc_info
->stmt
))
1843 gimple stmt
= acc_info
->stmt
;
1846 FOR_EACH_SSA_TREE_OPERAND (def
, stmt
, iter
, SSA_OP_DEF
)
1847 mark_sym_for_renaming (SSA_NAME_VAR (def
));
1848 gsi
= gsi_for_stmt (stmt
);
1849 gcc_assert (is_gimple_assign (acc_info
->stmt
));
1850 lhs
= gimple_assign_lhs (acc_info
->stmt
);
1851 if (TREE_CODE (lhs
) == SSA_NAME
1852 && acc_info
->level
< min_escape_l
- 1)
1854 imm_use_iterator imm_iter
;
1855 use_operand_p use_p
;
1858 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, lhs
)
1859 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
1864 gcc_assert (gimple_assign_rhs_code (acc_info
->stmt
)
1866 /* Emit convert statement to convert to type of use. */
1867 tmp
= create_tmp_var (TREE_TYPE (lhs
), "new");
1868 add_referenced_var (tmp
);
1869 rhs
= gimple_assign_rhs1 (acc_info
->stmt
);
1870 rhs
= fold_convert (TREE_TYPE (tmp
),
1871 TREE_OPERAND (rhs
, 0));
1872 new_stmt
= gimple_build_assign (tmp
, rhs
);
1873 tmp
= make_ssa_name (tmp
, new_stmt
);
1874 gimple_assign_set_lhs (new_stmt
, tmp
);
1875 gsi
= gsi_for_stmt (acc_info
->stmt
);
1876 gsi_insert_after (&gsi
, new_stmt
, GSI_SAME_STMT
);
1877 SET_USE (use_p
, tmp
);
1880 if (acc_info
->level
< min_escape_l
- 1)
1881 gsi_remove (&gsi
, true);
1886 code
= gimple_assign_rhs_code (acc_info
->stmt
);
1887 if (code
== INDIRECT_REF
1888 && acc_info
->level
< min_escape_l
- 1)
1890 /* Replace the INDIRECT_REF with NOP (cast) usually we are casting
1891 from "pointer to type" to "type". */
1893 build1 (NOP_EXPR
, TREE_TYPE (gimple_assign_rhs1 (acc_info
->stmt
)),
1894 TREE_OPERAND (gimple_assign_rhs1 (acc_info
->stmt
), 0));
1895 gimple_assign_set_rhs_code (acc_info
->stmt
, NOP_EXPR
);
1896 gimple_assign_set_rhs1 (acc_info
->stmt
, t
);
1898 else if (code
== POINTER_PLUS_EXPR
1899 && acc_info
->level
< (min_escape_l
))
1901 imm_use_iterator imm_iter
;
1902 use_operand_p use_p
;
1905 int k
= acc_info
->level
;
1906 tree num_elements
, total_elements
;
1908 tree d_size
= mi
->dimension_size
[k
];
1910 /* We already make sure in the analysis that the first operand
1911 is the base and the second is the offset. */
1912 offset
= acc_info
->offset
;
1913 if (mi
->dim_map
[k
] == min_escape_l
- 1)
1915 if (!check_transpose_p
|| mi
->is_transposed_p
== false)
1920 tree d_type_size
, d_type_size_k
;
1922 d_type_size
= size_int (mi
->dimension_type_size
[min_escape_l
]);
1923 d_type_size_k
= size_int (mi
->dimension_type_size
[k
+ 1]);
1926 compute_offset (mi
->dimension_type_size
[min_escape_l
],
1927 mi
->dimension_type_size
[k
+ 1], offset
);
1929 total_elements
= new_offset
;
1930 if (new_offset
!= offset
)
1932 gsi
= gsi_for_stmt (acc_info
->stmt
);
1933 tmp1
= force_gimple_operand_gsi (&gsi
, total_elements
,
1935 true, GSI_SAME_STMT
);
1943 d_size
= mi
->dimension_size
[mi
->dim_map
[k
] + 1];
1945 fold_build2 (MULT_EXPR
, sizetype
, fold_convert (sizetype
, acc_info
->index
),
1946 fold_convert (sizetype
, d_size
));
1947 add_referenced_var (d_size
);
1948 gsi
= gsi_for_stmt (acc_info
->stmt
);
1949 tmp1
= force_gimple_operand_gsi (&gsi
, num_elements
, true,
1950 NULL
, true, GSI_SAME_STMT
);
1952 /* Replace the offset if needed. */
1955 if (TREE_CODE (offset
) == SSA_NAME
)
1959 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, offset
)
1960 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
1961 if (use_stmt
== acc_info
->stmt
)
1962 SET_USE (use_p
, tmp1
);
1966 gcc_assert (TREE_CODE (offset
) == INTEGER_CST
);
1967 gimple_assign_set_rhs2 (acc_info
->stmt
, tmp1
);
1968 update_stmt (acc_info
->stmt
);
1972 /* ??? meanwhile this happens because we record the same access
1973 site more than once; we should be using a hash table to
1974 avoid this and insert the STMT of the access site only
1977 gcc_unreachable (); */
1980 VEC_free (access_site_info_p
, heap
, mi
->access_l
);
1982 update_ssa (TODO_update_ssa
);
1983 #ifdef ENABLE_CHECKING
1989 /* Sort A array of counts. Arrange DIM_MAP to reflect the new order. */
1992 sort_dim_hot_level (gcov_type
* a
, int *dim_map
, int n
)
1997 for (i
= 0; i
< n
- 1; i
++)
1999 for (j
= 0; j
< n
- 1 - i
; j
++)
2001 if (a
[j
+ 1] < a
[j
])
2003 tmp
= a
[j
]; /* swap a[j] and a[j+1] */
2007 dim_map
[j
] = dim_map
[j
+ 1];
2008 dim_map
[j
+ 1] = tmp1
;
2014 /* Replace multiple mallocs (one for each dimension) to one malloc
2015 with the size of DIM1*DIM2*...*DIMN*size_of_element
2016 Make sure that we hold the size in the malloc site inside a
2017 new global variable; this way we ensure that the size doesn't
2018 change and it is accessible from all the other functions that
2019 uses the matrix. Also, the original calls to free are deleted,
2020 and replaced by a new call to free the flattened matrix. */
2023 transform_allocation_sites (void **slot
, void *data ATTRIBUTE_UNUSED
)
2026 struct matrix_info
*mi
;
2027 tree type
, oldfn
, prev_dim_size
;
2028 gimple call_stmt_0
, use_stmt
;
2029 struct cgraph_node
*c_node
;
2030 struct cgraph_edge
*e
;
2031 gimple_stmt_iterator gsi
;
2032 struct malloc_call_data mcd
= {NULL
, NULL_TREE
, NULL_TREE
};
2033 HOST_WIDE_INT element_size
;
2035 imm_use_iterator imm_iter
;
2036 use_operand_p use_p
;
2037 tree old_size_0
, tmp
;
2041 mi
= (struct matrix_info
*) *slot
;
2043 min_escape_l
= mi
->min_indirect_level_escape
;
2045 if (!mi
->malloc_for_level
)
2046 mi
->min_indirect_level_escape
= 0;
2048 if (mi
->min_indirect_level_escape
< 2)
2051 mi
->dim_map
= (int *) xcalloc (mi
->min_indirect_level_escape
, sizeof (int));
2052 for (i
= 0; i
< mi
->min_indirect_level_escape
; i
++)
2054 if (check_transpose_p
)
2060 fprintf (dump_file
, "Matrix %s:\n", get_name (mi
->decl
));
2061 for (i
= 0; i
< min_escape_l
; i
++)
2063 fprintf (dump_file
, "dim %d before sort ", i
);
2064 if (mi
->dim_hot_level
)
2066 "count is " HOST_WIDEST_INT_PRINT_DEC
" \n",
2067 mi
->dim_hot_level
[i
]);
2070 sort_dim_hot_level (mi
->dim_hot_level
, mi
->dim_map
,
2071 mi
->min_indirect_level_escape
);
2073 for (i
= 0; i
< min_escape_l
; i
++)
2075 fprintf (dump_file
, "dim %d after sort\n", i
);
2076 if (mi
->dim_hot_level
)
2077 fprintf (dump_file
, "count is " HOST_WIDE_INT_PRINT_DEC
2078 " \n", (HOST_WIDE_INT
) mi
->dim_hot_level
[i
]);
2080 for (i
= 0; i
< mi
->min_indirect_level_escape
; i
++)
2083 fprintf (dump_file
, "dim_map[%d] after sort %d\n", i
,
2085 if (mi
->dim_map
[i
] != i
)
2089 "Transposed dimensions: dim %d is now dim %d\n",
2091 mi
->is_transposed_p
= true;
2097 for (i
= 0; i
< mi
->min_indirect_level_escape
; i
++)
2100 /* Call statement of allocation site of level 0. */
2101 call_stmt_0
= mi
->malloc_for_level
[0];
2103 /* Finds the correct malloc information. */
2104 collect_data_for_malloc_call (call_stmt_0
, &mcd
);
2106 mi
->dimension_size
[0] = mcd
.size_var
;
2107 mi
->dimension_size_orig
[0] = mcd
.size_var
;
2108 /* Make sure that the variables in the size expression for
2109 all the dimensions (above level 0) aren't modified in
2110 the allocation function. */
2111 for (i
= 1; i
< mi
->min_indirect_level_escape
; i
++)
2114 check_var_data data
;
2116 /* mi->dimension_size must contain the expression of the size calculated
2117 in check_allocation_function. */
2118 gcc_assert (mi
->dimension_size
[i
]);
2120 data
.fn
= mi
->allocation_function_decl
;
2122 t
= walk_tree_without_duplicates (&(mi
->dimension_size
[i
]),
2123 check_var_notmodified_p
,
2127 mark_min_matrix_escape_level (mi
, i
, data
.stmt
);
2132 if (mi
->min_indirect_level_escape
< 2)
2135 /* Since we should make sure that the size expression is available
2136 before the call to malloc of level 0. */
2137 gsi
= gsi_for_stmt (call_stmt_0
);
2139 /* Find out the size of each dimension by looking at the malloc
2140 sites and create a global variable to hold it.
2141 We add the assignment to the global before the malloc of level 0. */
2143 /* To be able to produce gimple temporaries. */
2144 oldfn
= current_function_decl
;
2145 current_function_decl
= mi
->allocation_function_decl
;
2146 push_cfun (DECL_STRUCT_FUNCTION (mi
->allocation_function_decl
));
2148 /* Set the dimension sizes as follows:
2149 DIM_SIZE[i] = DIM_SIZE[n] * ... * DIM_SIZE[i]
2150 where n is the maximum non escaping level. */
2151 element_size
= mi
->dimension_type_size
[mi
->min_indirect_level_escape
];
2152 prev_dim_size
= NULL_TREE
;
2154 for (i
= mi
->min_indirect_level_escape
- 1; i
>= 0; i
--)
2156 tree dim_size
, dim_var
;
2160 /* Now put the size expression in a global variable and initialize it to
2161 the size expression before the malloc of level 0. */
2163 add_new_static_var (TREE_TYPE
2164 (mi
->dimension_size_orig
[mi
->dim_map
[i
]]));
2165 type
= TREE_TYPE (mi
->dimension_size_orig
[mi
->dim_map
[i
]]);
2167 /* DIM_SIZE = MALLOC_SIZE_PARAM / TYPE_SIZE. */
2168 /* Find which dim ID becomes dim I. */
2169 for (id
= 0; id
< mi
->min_indirect_level_escape
; id
++)
2170 if (mi
->dim_map
[id
] == i
)
2173 build_int_cst (type
, mi
->dimension_type_size
[id
+ 1]);
2175 prev_dim_size
= build_int_cst (type
, element_size
);
2176 if (!check_transpose_p
&& i
== mi
->min_indirect_level_escape
- 1)
2178 dim_size
= mi
->dimension_size_orig
[id
];
2183 fold_build2 (TRUNC_DIV_EXPR
, type
, mi
->dimension_size_orig
[id
],
2186 dim_size
= fold_build2 (MULT_EXPR
, type
, dim_size
, prev_dim_size
);
2188 dim_size
= force_gimple_operand_gsi (&gsi
, dim_size
, true, NULL
,
2189 true, GSI_SAME_STMT
);
2190 /* GLOBAL_HOLDING_THE_SIZE = DIM_SIZE. */
2191 stmt
= gimple_build_assign (dim_var
, dim_size
);
2192 mark_symbols_for_renaming (stmt
);
2193 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
2195 prev_dim_size
= mi
->dimension_size
[i
] = dim_var
;
2197 update_ssa (TODO_update_ssa
);
2198 /* Replace the malloc size argument in the malloc of level 0 to be
2199 the size of all the dimensions. */
2200 c_node
= cgraph_node (mi
->allocation_function_decl
);
2201 old_size_0
= gimple_call_arg (call_stmt_0
, 0);
2202 tmp
= force_gimple_operand_gsi (&gsi
, mi
->dimension_size
[0], true,
2203 NULL
, true, GSI_SAME_STMT
);
2204 if (TREE_CODE (old_size_0
) == SSA_NAME
)
2206 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, old_size_0
)
2207 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
2208 if (use_stmt
== call_stmt_0
)
2209 SET_USE (use_p
, tmp
);
2211 /* When deleting the calls to malloc we need also to remove the edge from
2212 the call graph to keep it consistent. Notice that cgraph_edge may
2213 create a new node in the call graph if there is no node for the given
2214 declaration; this shouldn't be the case but currently there is no way to
2215 check this outside of "cgraph.c". */
2216 for (i
= 1; i
< mi
->min_indirect_level_escape
; i
++)
2218 gimple_stmt_iterator gsi
;
2219 gimple use_stmt1
= NULL
;
2221 gimple call_stmt
= mi
->malloc_for_level
[i
];
2222 gcc_assert (is_gimple_call (call_stmt
));
2223 e
= cgraph_edge (c_node
, call_stmt
);
2225 cgraph_remove_edge (e
);
2226 gsi
= gsi_for_stmt (call_stmt
);
2227 /* Remove the call stmt. */
2228 gsi_remove (&gsi
, true);
2229 /* remove the type cast stmt. */
2230 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
,
2231 gimple_call_lhs (call_stmt
))
2233 use_stmt1
= use_stmt
;
2234 gsi
= gsi_for_stmt (use_stmt
);
2235 gsi_remove (&gsi
, true);
2237 /* Remove the assignment of the allocated area. */
2238 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
,
2239 gimple_get_lhs (use_stmt1
))
2241 gsi
= gsi_for_stmt (use_stmt
);
2242 gsi_remove (&gsi
, true);
2245 update_ssa (TODO_update_ssa
);
2246 #ifdef ENABLE_CHECKING
2249 /* Delete the calls to free. */
2250 for (i
= 1; i
< mi
->min_indirect_level_escape
; i
++)
2252 gimple_stmt_iterator gsi
;
2254 /* ??? wonder why this case is possible but we failed on it once. */
2255 if (!mi
->free_stmts
[i
].stmt
)
2258 c_node
= cgraph_node (mi
->free_stmts
[i
].func
);
2259 gcc_assert (is_gimple_call (mi
->free_stmts
[i
].stmt
));
2260 e
= cgraph_edge (c_node
, mi
->free_stmts
[i
].stmt
);
2262 cgraph_remove_edge (e
);
2263 current_function_decl
= mi
->free_stmts
[i
].func
;
2264 set_cfun (DECL_STRUCT_FUNCTION (mi
->free_stmts
[i
].func
));
2265 gsi
= gsi_for_stmt (mi
->free_stmts
[i
].stmt
);
2266 gsi_remove (&gsi
, true);
2268 /* Return to the previous situation. */
2269 current_function_decl
= oldfn
;
2276 /* Print out the results of the escape analysis. */
2278 dump_matrix_reorg_analysis (void **slot
, void *data ATTRIBUTE_UNUSED
)
2280 struct matrix_info
*mi
= (struct matrix_info
*) *slot
;
2284 fprintf (dump_file
, "Matrix \"%s\"; Escaping Level: %d, Num Dims: %d,",
2285 get_name (mi
->decl
), mi
->min_indirect_level_escape
, mi
->num_dims
);
2286 fprintf (dump_file
, " Malloc Dims: %d, ", mi
->max_malloced_level
);
2287 fprintf (dump_file
, "\n");
2288 if (mi
->min_indirect_level_escape
>= 2)
2289 fprintf (dump_file
, "Flattened %d dimensions \n",
2290 mi
->min_indirect_level_escape
);
2294 /* Perform matrix flattening. */
2299 struct cgraph_node
*node
;
2302 check_transpose_p
= true;
2304 check_transpose_p
= false;
2305 /* If there are hand written vectors, we skip this optimization. */
2306 for (node
= cgraph_nodes
; node
; node
= node
->next
)
2307 if (!may_flatten_matrices (node
))
2309 matrices_to_reorg
= htab_create (37, mtt_info_hash
, mtt_info_eq
, mat_free
);
2310 /* Find and record all potential matrices in the program. */
2311 find_matrices_decl ();
2312 /* Analyze the accesses of the matrices (escaping analysis). */
2313 for (node
= cgraph_nodes
; node
; node
= node
->next
)
2318 temp_fn
= current_function_decl
;
2319 current_function_decl
= node
->decl
;
2320 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
2321 bitmap_obstack_initialize (NULL
);
2322 gimple_register_cfg_hooks ();
2324 if (!gimple_in_ssa_p (cfun
))
2326 free_dominance_info (CDI_DOMINATORS
);
2327 free_dominance_info (CDI_POST_DOMINATORS
);
2329 current_function_decl
= temp_fn
;
2330 bitmap_obstack_release (NULL
);
2335 #ifdef ENABLE_CHECKING
2336 verify_flow_info ();
2339 if (!matrices_to_reorg
)
2341 free_dominance_info (CDI_DOMINATORS
);
2342 free_dominance_info (CDI_POST_DOMINATORS
);
2344 current_function_decl
= temp_fn
;
2345 bitmap_obstack_release (NULL
);
2350 /* Create htap for phi nodes. */
2351 htab_mat_acc_phi_nodes
= htab_create (37, mat_acc_phi_hash
,
2352 mat_acc_phi_eq
, free
);
2353 if (!check_transpose_p
)
2354 find_sites_in_func (false);
2357 find_sites_in_func (true);
2358 loop_optimizer_init (LOOPS_NORMAL
);
2361 htab_traverse (matrices_to_reorg
, analyze_transpose
, NULL
);
2365 loop_optimizer_finalize ();
2366 current_loops
= NULL
;
2369 /* If the current function is the allocation function for any of
2370 the matrices we check its allocation and the escaping level. */
2371 htab_traverse (matrices_to_reorg
, check_allocation_function
, NULL
);
2372 free_dominance_info (CDI_DOMINATORS
);
2373 free_dominance_info (CDI_POST_DOMINATORS
);
2375 current_function_decl
= temp_fn
;
2376 bitmap_obstack_release (NULL
);
2378 htab_traverse (matrices_to_reorg
, transform_allocation_sites
, NULL
);
2379 /* Now transform the accesses. */
2380 for (node
= cgraph_nodes
; node
; node
= node
->next
)
2383 /* Remember that allocation sites have been handled. */
2386 temp_fn
= current_function_decl
;
2387 current_function_decl
= node
->decl
;
2388 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
2389 bitmap_obstack_initialize (NULL
);
2390 gimple_register_cfg_hooks ();
2391 record_all_accesses_in_func ();
2392 htab_traverse (matrices_to_reorg
, transform_access_sites
, NULL
);
2393 free_dominance_info (CDI_DOMINATORS
);
2394 free_dominance_info (CDI_POST_DOMINATORS
);
2396 current_function_decl
= temp_fn
;
2397 bitmap_obstack_release (NULL
);
2399 htab_traverse (matrices_to_reorg
, dump_matrix_reorg_analysis
, NULL
);
2401 current_function_decl
= NULL
;
2403 matrices_to_reorg
= NULL
;
2408 /* The condition for matrix flattening to be performed. */
2410 gate_matrix_reorg (void)
2412 return flag_ipa_matrix_reorg
&& flag_whole_program
;
2415 struct simple_ipa_opt_pass pass_ipa_matrix_reorg
=
2419 "matrix-reorg", /* name */
2420 gate_matrix_reorg
, /* gate */
2421 matrix_reorg
, /* execute */
2424 0, /* static_pass_number */
2425 TV_NONE
, /* tv_id */
2426 0, /* properties_required */
2427 0, /* properties_provided */
2428 0, /* properties_destroyed */
2429 0, /* todo_flags_start */
2430 TODO_dump_cgraph
| TODO_dump_func
/* todo_flags_finish */