1 /* Matrix layout transformations.
2 Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
3 Contributed by Razya Ladelsky <razya@il.ibm.com>
4 Originally written by Revital Eres and Mustafa Hagog.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
23 Matrix flattening optimization tries to replace a N-dimensional
24 matrix with its equivalent M-dimensional matrix, where M < N.
25 This first implementation focuses on global matrices defined dynamically.
27 When N==1, we actually flatten the whole matrix.
28 For instance consider a two-dimensional array a [dim1] [dim2].
29 The code for allocating space for it usually looks like:
31 a = (int **) malloc(dim1 * sizeof(int *));
32 for (i=0; i<dim1; i++)
33 a[i] = (int *) malloc (dim2 * sizeof(int));
35 If the array "a" is found suitable for this optimization,
36 its allocation is replaced by:
38 a = (int *) malloc (dim1 * dim2 *sizeof(int));
40 and all the references to a[i][j] are replaced by a[i * dim2 + j].
42 The two main phases of the optimization are the analysis
44 The driver of the optimization is matrix_reorg ().
51 We'll number the dimensions outside-in, meaning the most external
52 is 0, then 1, and so on.
53 The analysis part of the optimization determines K, the escape
54 level of a N-dimensional matrix (K <= N), that allows flattening of
55 the external dimensions 0,1,..., K-1. Escape level 0 means that the
56 whole matrix escapes and no flattening is possible.
58 The analysis part is implemented in analyze_matrix_allocation_site()
59 and analyze_matrix_accesses().
63 In this phase we define the new flattened matrices that replace the
64 original matrices in the code.
65 Implemented in transform_allocation_sites(),
66 transform_access_sites().
70 The idea of Matrix Transposing is organizing the matrix in a different
71 layout such that the dimensions are reordered.
72 This could produce better cache behavior in some cases.
74 For example, lets look at the matrix accesses in the following loop:
80 This loop can produce good cache behavior because the elements of
81 the inner dimension are accessed sequentially.
83 However, if the accesses of the matrix were of the following form:
89 In this loop we iterate the columns and not the rows.
90 Therefore, replacing the rows and columns
91 would have had an organization with better (cache) locality.
92 Replacing the dimensions of the matrix is called matrix transposing.
94 This example, of course, could be enhanced to multiple dimensions matrices
97 Since a program could include all kind of accesses, there is a decision
98 mechanism, implemented in analyze_transpose(), which implements a
99 heuristic that tries to determine whether to transpose the matrix or not,
100 according to the form of the more dominant accesses.
101 This decision is transferred to the flattening mechanism, and whether
102 the matrix was transposed or not, the matrix is flattened (if possible).
104 This decision making is based on profiling information and loop information.
105 If profiling information is available, decision making mechanism will be
106 operated, otherwise the matrix will only be flattened (if possible).
108 Both optimizations are described in the paper "Matrix flattening and
109 transposing in GCC" which was presented in GCC summit 2006.
110 http://www.gccsummit.org/2006/2006-GCC-Summit-Proceedings.pdf. */
114 #include "coretypes.h"
118 #include "tree-inline.h"
119 #include "tree-flow.h"
120 #include "tree-flow-inline.h"
121 #include "langhooks.h"
129 #include "diagnostic.h"
134 #include "function.h"
135 #include "basic-block.h"
137 #include "tree-iterator.h"
138 #include "tree-pass.h"
140 #include "tree-data-ref.h"
141 #include "tree-chrec.h"
142 #include "tree-scalar-evolution.h"
144 /* We need to collect a lot of data from the original malloc,
145 particularly as the gimplifier has converted:
147 orig_var = (struct_type *) malloc (x * sizeof (struct_type *));
151 T3 = <constant> ; ** <constant> is amount to malloc; precomputed **
153 T5 = (struct_type *) T4;
156 The following struct fields allow us to collect all the necessary data from
157 the gimplified program. The comments in the struct below are all based
158 on the gimple example above. */
160 struct malloc_call_data
162 gimple call_stmt
; /* Tree for "T4 = malloc (T3);" */
163 tree size_var
; /* Var decl for T3. */
164 tree malloc_size
; /* Tree for "<constant>", the rhs assigned to T3. */
167 static tree
can_calculate_expr_before_stmt (tree
, sbitmap
);
168 static tree
can_calculate_stmt_before_stmt (gimple
, sbitmap
);
170 /* The front end of the compiler, when parsing statements of the form:
172 var = (type_cast) malloc (sizeof (type));
174 always converts this single statement into the following statements
179 T.3 = (type_cast) T.2;
182 Since we need to create new malloc statements and modify the original
183 statements somewhat, we need to find all four of the above statements.
184 Currently record_call_1 (called for building cgraph edges) finds and
185 records the statements containing the actual call to malloc, but we
186 need to find the rest of the variables/statements on our own. That
187 is what the following function does. */
189 collect_data_for_malloc_call (gimple stmt
, struct malloc_call_data
*m_data
)
191 tree size_var
= NULL
;
195 gcc_assert (is_gimple_call (stmt
));
197 malloc_fn_decl
= gimple_call_fndecl (stmt
);
198 if (malloc_fn_decl
== NULL
199 || DECL_FUNCTION_CODE (malloc_fn_decl
) != BUILT_IN_MALLOC
)
202 arg1
= gimple_call_arg (stmt
, 0);
205 m_data
->call_stmt
= stmt
;
206 m_data
->size_var
= size_var
;
207 if (TREE_CODE (size_var
) != VAR_DECL
)
208 m_data
->malloc_size
= size_var
;
210 m_data
->malloc_size
= NULL_TREE
;
213 /* Information about matrix access site.
214 For example: if an access site of matrix arr is arr[i][j]
215 the ACCESS_SITE_INFO structure will have the address
216 of arr as its stmt. The INDEX_INFO will hold information about the
217 initial address and index of each dimension. */
218 struct access_site_info
220 /* The statement (INDIRECT_REF or POINTER_PLUS_EXPR). */
223 /* In case of POINTER_PLUS_EXPR, what is the offset. */
226 /* The index which created the offset. */
229 /* The indirection level of this statement. */
232 /* TRUE for allocation site FALSE for access site. */
235 /* The function containing the access site. */
238 /* This access is iterated in the inner most loop */
239 bool iterated_by_inner_most_loop_p
;
242 typedef struct access_site_info
*access_site_info_p
;
243 DEF_VEC_P (access_site_info_p
);
244 DEF_VEC_ALLOC_P (access_site_info_p
, heap
);
246 /* Information about matrix to flatten. */
249 /* Decl tree of this matrix. */
251 /* Number of dimensions; number
252 of "*" in the type declaration. */
255 /* Minimum indirection level that escapes, 0 means that
256 the whole matrix escapes, k means that dimensions
257 0 to ACTUAL_DIM - k escapes. */
258 int min_indirect_level_escape
;
260 gimple min_indirect_level_escape_stmt
;
262 /* Hold the allocation site for each level (dimension).
263 We can use NUM_DIMS as the upper bound and allocate the array
264 once with this number of elements and no need to use realloc and
265 MAX_MALLOCED_LEVEL. */
266 gimple
*malloc_for_level
;
268 int max_malloced_level
;
270 /* Is the matrix transposed. */
271 bool is_transposed_p
;
273 /* The location of the allocation sites (they must be in one
275 tree allocation_function_decl
;
277 /* The calls to free for each level of indirection. */
284 /* An array which holds for each dimension its size. where
285 dimension 0 is the outer most (one that contains all the others).
287 tree
*dimension_size
;
289 /* An array which holds for each dimension it's original size
290 (before transposing and flattening take place). */
291 tree
*dimension_size_orig
;
293 /* An array which holds for each dimension the size of the type of
294 of elements accessed in that level (in bytes). */
295 HOST_WIDE_INT
*dimension_type_size
;
297 int dimension_type_size_len
;
299 /* An array collecting the count of accesses for each dimension. */
300 gcov_type
*dim_hot_level
;
302 /* An array of the accesses to be flattened.
303 elements are of type "struct access_site_info *". */
304 VEC (access_site_info_p
, heap
) * access_l
;
306 /* A map of how the dimensions will be organized at the end of
311 /* In each phi node we want to record the indirection level we have when we
312 get to the phi node. Usually we will have phi nodes with more than two
313 arguments, then we must assure that all of them get to the phi node with
314 the same indirection level, otherwise it's not safe to do the flattening.
315 So we record the information regarding the indirection level each time we
316 get to the phi node in this hash table. */
318 struct matrix_access_phi_node
321 int indirection_level
;
324 /* We use this structure to find if the SSA variable is accessed inside the
325 tree and record the tree containing it. */
327 struct ssa_acc_in_tree
329 /* The variable whose accesses in the tree we are looking for. */
331 /* The tree and code inside it the ssa_var is accessed, currently
332 it could be an INDIRECT_REF or CALL_EXPR. */
333 enum tree_code t_code
;
335 /* The place in the containing tree. */
341 static void analyze_matrix_accesses (struct matrix_info
*, tree
, int, bool,
343 static int transform_allocation_sites (void **, void *);
344 static int transform_access_sites (void **, void *);
345 static int analyze_transpose (void **, void *);
346 static int dump_matrix_reorg_analysis (void **, void *);
348 static bool check_transpose_p
;
350 /* Hash function used for the phi nodes. */
353 mat_acc_phi_hash (const void *p
)
355 const struct matrix_access_phi_node
*const ma_phi
=
356 (const struct matrix_access_phi_node
*) p
;
358 return htab_hash_pointer (ma_phi
->phi
);
361 /* Equality means phi node pointers are the same. */
364 mat_acc_phi_eq (const void *p1
, const void *p2
)
366 const struct matrix_access_phi_node
*const phi1
=
367 (const struct matrix_access_phi_node
*) p1
;
368 const struct matrix_access_phi_node
*const phi2
=
369 (const struct matrix_access_phi_node
*) p2
;
371 if (phi1
->phi
== phi2
->phi
)
377 /* Hold the PHI nodes we visit during the traversal for escaping
379 static htab_t htab_mat_acc_phi_nodes
= NULL
;
381 /* This hash-table holds the information about the matrices we are
383 static htab_t matrices_to_reorg
= NULL
;
385 /* Return a hash for MTT, which is really a "matrix_info *". */
387 mtt_info_hash (const void *mtt
)
389 return htab_hash_pointer (((const struct matrix_info
*) mtt
)->decl
);
392 /* Return true if MTT1 and MTT2 (which are really both of type
393 "matrix_info *") refer to the same decl. */
395 mtt_info_eq (const void *mtt1
, const void *mtt2
)
397 const struct matrix_info
*const i1
= (const struct matrix_info
*) mtt1
;
398 const struct matrix_info
*const i2
= (const struct matrix_info
*) mtt2
;
400 if (i1
->decl
== i2
->decl
)
406 /* Return false if STMT may contain a vector expression.
407 In this situation, all matrices should not be flattened. */
409 may_flatten_matrices_1 (gimple stmt
)
413 switch (gimple_code (stmt
))
416 if (!gimple_assign_cast_p (stmt
))
419 t
= gimple_assign_rhs1 (stmt
);
420 while (CONVERT_EXPR_P (t
))
422 if (TREE_TYPE (t
) && POINTER_TYPE_P (TREE_TYPE (t
)))
426 pointee
= TREE_TYPE (t
);
427 while (POINTER_TYPE_P (pointee
))
428 pointee
= TREE_TYPE (pointee
);
429 if (TREE_CODE (pointee
) == VECTOR_TYPE
)
433 "Found vector type, don't flatten matrix\n");
437 t
= TREE_OPERAND (t
, 0);
441 /* Asm code could contain vector operations. */
450 /* Return false if there are hand-written vectors in the program.
451 We disable the flattening in such a case. */
453 may_flatten_matrices (struct cgraph_node
*node
)
456 struct function
*func
;
458 gimple_stmt_iterator gsi
;
463 func
= DECL_STRUCT_FUNCTION (decl
);
464 FOR_EACH_BB_FN (bb
, func
)
465 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
466 if (!may_flatten_matrices_1 (gsi_stmt (gsi
)))
472 /* Given a VAR_DECL, check its type to determine whether it is
473 a definition of a dynamic allocated matrix and therefore is
474 a suitable candidate for the matrix flattening optimization.
475 Return NULL if VAR_DECL is not such decl. Otherwise, allocate
476 a MATRIX_INFO structure, fill it with the relevant information
477 and return a pointer to it.
478 TODO: handle also statically defined arrays. */
479 static struct matrix_info
*
480 analyze_matrix_decl (tree var_decl
)
482 struct matrix_info
*m_node
, tmpmi
, *mi
;
486 gcc_assert (matrices_to_reorg
);
488 if (TREE_CODE (var_decl
) == PARM_DECL
)
489 var_type
= DECL_ARG_TYPE (var_decl
);
490 else if (TREE_CODE (var_decl
) == VAR_DECL
)
491 var_type
= TREE_TYPE (var_decl
);
495 if (!POINTER_TYPE_P (var_type
))
498 while (POINTER_TYPE_P (var_type
))
500 var_type
= TREE_TYPE (var_type
);
507 if (!COMPLETE_TYPE_P (var_type
)
508 || TREE_CODE (TYPE_SIZE_UNIT (var_type
)) != INTEGER_CST
)
511 /* Check to see if this pointer is already in there. */
512 tmpmi
.decl
= var_decl
;
513 mi
= (struct matrix_info
*) htab_find (matrices_to_reorg
, &tmpmi
);
518 /* Record the matrix. */
520 m_node
= (struct matrix_info
*) xcalloc (1, sizeof (struct matrix_info
));
521 m_node
->decl
= var_decl
;
522 m_node
->num_dims
= dim_num
;
524 = (struct free_info
*) xcalloc (dim_num
, sizeof (struct free_info
));
526 /* Init min_indirect_level_escape to -1 to indicate that no escape
527 analysis has been done yet. */
528 m_node
->min_indirect_level_escape
= -1;
529 m_node
->is_transposed_p
= false;
538 struct matrix_info
*mat
= (struct matrix_info
*) e
;
544 free (mat
->free_stmts
);
545 if (mat
->dim_hot_level
)
546 free (mat
->dim_hot_level
);
547 if (mat
->malloc_for_level
)
548 free (mat
->malloc_for_level
);
551 /* Find all potential matrices.
552 TODO: currently we handle only multidimensional
553 dynamically allocated arrays. */
555 find_matrices_decl (void)
557 struct matrix_info
*tmp
;
559 struct varpool_node
*vnode
;
561 gcc_assert (matrices_to_reorg
);
563 /* For every global variable in the program:
564 Check to see if it's of a candidate type and record it. */
565 for (vnode
= varpool_nodes_queue
; vnode
; vnode
= vnode
->next_needed
)
567 tree var_decl
= vnode
->decl
;
569 if (!var_decl
|| TREE_CODE (var_decl
) != VAR_DECL
)
572 if (matrices_to_reorg
)
573 if ((tmp
= analyze_matrix_decl (var_decl
)))
575 if (!TREE_ADDRESSABLE (var_decl
))
577 slot
= htab_find_slot (matrices_to_reorg
, tmp
, INSERT
);
585 /* Mark that the matrix MI escapes at level L. */
587 mark_min_matrix_escape_level (struct matrix_info
*mi
, int l
, gimple s
)
589 if (mi
->min_indirect_level_escape
== -1
590 || (mi
->min_indirect_level_escape
> l
))
592 mi
->min_indirect_level_escape
= l
;
593 mi
->min_indirect_level_escape_stmt
= s
;
597 /* Find if the SSA variable is accessed inside the
598 tree and record the tree containing it.
599 The only relevant uses are the case of SSA_NAME, or SSA inside
600 INDIRECT_REF, PLUS_EXPR, POINTER_PLUS_EXPR, MULT_EXPR. */
602 ssa_accessed_in_tree (tree t
, struct ssa_acc_in_tree
*a
)
604 a
->t_code
= TREE_CODE (t
);
612 if (SSA_VAR_P (TREE_OPERAND (t
, 0))
613 && TREE_OPERAND (t
, 0) == a
->ssa_var
)
621 /* Find if the SSA variable is accessed on the right hand side of
625 ssa_accessed_in_call_rhs (gimple stmt
, struct ssa_acc_in_tree
*a
)
631 a
->t_code
= CALL_EXPR
;
632 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
634 arg
= gimple_call_arg (stmt
, i
);
635 if (arg
== a
->ssa_var
)
638 decl
= gimple_call_fndecl (stmt
);
645 /* Find if the SSA variable is accessed on the right hand side of
646 gimple assign STMT. */
649 ssa_accessed_in_assign_rhs (gimple stmt
, struct ssa_acc_in_tree
*a
)
652 a
->t_code
= gimple_assign_rhs_code (stmt
);
660 case VIEW_CONVERT_EXPR
:
661 ssa_accessed_in_tree (gimple_assign_rhs1 (stmt
), a
);
663 case POINTER_PLUS_EXPR
:
666 op1
= gimple_assign_rhs1 (stmt
);
667 op2
= gimple_assign_rhs2 (stmt
);
669 if (op1
== a
->ssa_var
)
674 else if (op2
== a
->ssa_var
)
685 /* Record the access/allocation site information for matrix MI so we can
686 handle it later in transformation. */
688 record_access_alloc_site_info (struct matrix_info
*mi
, gimple stmt
, tree offset
,
689 tree index
, int level
, bool is_alloc
)
691 struct access_site_info
*acc_info
;
694 mi
->access_l
= VEC_alloc (access_site_info_p
, heap
, 100);
697 = (struct access_site_info
*)
698 xcalloc (1, sizeof (struct access_site_info
));
699 acc_info
->stmt
= stmt
;
700 acc_info
->offset
= offset
;
701 acc_info
->index
= index
;
702 acc_info
->function_decl
= current_function_decl
;
703 acc_info
->level
= level
;
704 acc_info
->is_alloc
= is_alloc
;
706 VEC_safe_push (access_site_info_p
, heap
, mi
->access_l
, acc_info
);
710 /* Record the malloc as the allocation site of the given LEVEL. But
711 first we Make sure that all the size parameters passed to malloc in
712 all the allocation sites could be pre-calculated before the call to
713 the malloc of level 0 (the main malloc call). */
715 add_allocation_site (struct matrix_info
*mi
, gimple stmt
, int level
)
717 struct malloc_call_data mcd
;
719 /* Make sure that the allocation sites are in the same function. */
720 if (!mi
->allocation_function_decl
)
721 mi
->allocation_function_decl
= current_function_decl
;
722 else if (mi
->allocation_function_decl
!= current_function_decl
)
724 int min_malloc_level
;
726 gcc_assert (mi
->malloc_for_level
);
728 /* Find the minimum malloc level that already has been seen;
729 we known its allocation function must be
730 MI->allocation_function_decl since it's different than
731 CURRENT_FUNCTION_DECL then the escaping level should be
732 MIN (LEVEL, MIN_MALLOC_LEVEL) - 1 , and the allocation function
733 must be set accordingly. */
734 for (min_malloc_level
= 0;
735 min_malloc_level
< mi
->max_malloced_level
736 && mi
->malloc_for_level
[min_malloc_level
]; min_malloc_level
++);
737 if (level
< min_malloc_level
)
739 mi
->allocation_function_decl
= current_function_decl
;
740 mark_min_matrix_escape_level (mi
, min_malloc_level
, stmt
);
744 mark_min_matrix_escape_level (mi
, level
, stmt
);
745 /* cannot be that (level == min_malloc_level)
746 we would have returned earlier. */
751 /* Find the correct malloc information. */
752 collect_data_for_malloc_call (stmt
, &mcd
);
754 /* We accept only calls to malloc function; we do not accept
755 calls like calloc and realloc. */
756 if (!mi
->malloc_for_level
)
758 mi
->malloc_for_level
= XCNEWVEC (gimple
, level
+ 1);
759 mi
->max_malloced_level
= level
+ 1;
761 else if (mi
->max_malloced_level
<= level
)
764 = XRESIZEVEC (gimple
, mi
->malloc_for_level
, level
+ 1);
766 /* Zero the newly allocated items. */
767 memset (&(mi
->malloc_for_level
[mi
->max_malloced_level
+ 1]),
768 0, (level
- mi
->max_malloced_level
) * sizeof (tree
));
770 mi
->max_malloced_level
= level
+ 1;
772 mi
->malloc_for_level
[level
] = stmt
;
775 /* Given an assignment statement STMT that we know that its
776 left-hand-side is the matrix MI variable, we traverse the immediate
777 uses backwards until we get to a malloc site. We make sure that
778 there is one and only one malloc site that sets this variable. When
779 we are performing the flattening we generate a new variable that
780 will hold the size for each dimension; each malloc that allocates a
781 dimension has the size parameter; we use that parameter to
782 initialize the dimension size variable so we can use it later in
783 the address calculations. LEVEL is the dimension we're inspecting.
784 Return if STMT is related to an allocation site. */
787 analyze_matrix_allocation_site (struct matrix_info
*mi
, gimple stmt
,
788 int level
, sbitmap visited
)
790 if (gimple_assign_copy_p (stmt
) || gimple_assign_cast_p (stmt
))
792 tree rhs
= gimple_assign_rhs1 (stmt
);
794 if (TREE_CODE (rhs
) == SSA_NAME
)
796 gimple def
= SSA_NAME_DEF_STMT (rhs
);
798 analyze_matrix_allocation_site (mi
, def
, level
, visited
);
801 /* If we are back to the original matrix variable then we
802 are sure that this is analyzed as an access site. */
803 else if (rhs
== mi
->decl
)
806 /* A result of call to malloc. */
807 else if (is_gimple_call (stmt
))
809 int call_flags
= gimple_call_flags (stmt
);
811 if (!(call_flags
& ECF_MALLOC
))
813 mark_min_matrix_escape_level (mi
, level
, stmt
);
819 const char *malloc_fname
;
821 malloc_fn_decl
= gimple_call_fndecl (stmt
);
822 if (malloc_fn_decl
== NULL_TREE
)
824 mark_min_matrix_escape_level (mi
, level
, stmt
);
827 malloc_fname
= IDENTIFIER_POINTER (DECL_NAME (malloc_fn_decl
));
828 if (DECL_FUNCTION_CODE (malloc_fn_decl
) != BUILT_IN_MALLOC
)
832 "Matrix %s is an argument to function %s\n",
833 get_name (mi
->decl
), get_name (malloc_fn_decl
));
834 mark_min_matrix_escape_level (mi
, level
, stmt
);
838 /* This is a call to malloc of level 'level'.
839 mi->max_malloced_level-1 == level means that we've
840 seen a malloc statement of level 'level' before.
841 If the statement is not the same one that we've
842 seen before, then there's another malloc statement
843 for the same level, which means that we need to mark
845 if (mi
->malloc_for_level
846 && mi
->max_malloced_level
-1 == level
847 && mi
->malloc_for_level
[level
] != stmt
)
849 mark_min_matrix_escape_level (mi
, level
, stmt
);
853 add_allocation_site (mi
, stmt
, level
);
856 /* Looks like we don't know what is happening in this
857 statement so be in the safe side and mark it as escaping. */
858 mark_min_matrix_escape_level (mi
, level
, stmt
);
861 /* The transposing decision making.
862 In order to to calculate the profitability of transposing, we collect two
863 types of information regarding the accesses:
864 1. profiling information used to express the hotness of an access, that
865 is how often the matrix is accessed by this access site (count of the
867 2. which dimension in the access site is iterated by the inner
868 most loop containing this access.
870 The matrix will have a calculated value of weighted hotness for each
872 Intuitively the hotness level of a dimension is a function of how
873 many times it was the most frequently accessed dimension in the
874 highly executed access sites of this matrix.
876 As computed by following equation:
879 \ \ dim_hot_level[i] +=
882 acc[j]->dim[i]->iter_by_inner_loop * count(j)
884 Where n is the number of dims and m is the number of the matrix
885 access sites. acc[j]->dim[i]->iter_by_inner_loop is 1 if acc[j]
886 iterates over dim[i] in innermost loop, and is 0 otherwise.
888 The organization of the new matrix should be according to the
889 hotness of each dimension. The hotness of the dimension implies
890 the locality of the elements.*/
892 analyze_transpose (void **slot
, void *data ATTRIBUTE_UNUSED
)
894 struct matrix_info
*mi
= (struct matrix_info
*) *slot
;
895 int min_escape_l
= mi
->min_indirect_level_escape
;
898 struct access_site_info
*acc_info
;
901 if (min_escape_l
< 2 || !mi
->access_l
)
906 VEC_iterate (access_site_info_p
, mi
->access_l
, i
, acc_info
);
909 VEC_free (access_site_info_p
, heap
, mi
->access_l
);
914 if (!mi
->dim_hot_level
)
916 (gcov_type
*) xcalloc (min_escape_l
, sizeof (gcov_type
));
919 for (i
= 0; VEC_iterate (access_site_info_p
, mi
->access_l
, i
, acc_info
);
922 if (gimple_assign_rhs_code (acc_info
->stmt
) == POINTER_PLUS_EXPR
923 && acc_info
->level
< min_escape_l
)
925 loop
= loop_containing_stmt (acc_info
->stmt
);
926 if (!loop
|| loop
->inner
)
931 if (simple_iv (loop
, loop
, acc_info
->offset
, &iv
, true))
937 istep
= int_cst_value (iv
.step
);
940 acc_info
->iterated_by_inner_most_loop_p
= 1;
941 mi
->dim_hot_level
[acc_info
->level
] +=
942 gimple_bb (acc_info
->stmt
)->count
;
950 VEC_free (access_site_info_p
, heap
, mi
->access_l
);
955 /* Find the index which defines the OFFSET from base.
956 We walk from use to def until we find how the offset was defined. */
958 get_index_from_offset (tree offset
, gimple def_stmt
)
960 tree op1
, op2
, index
;
962 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
964 if ((gimple_assign_copy_p (def_stmt
) || gimple_assign_cast_p (def_stmt
))
965 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == SSA_NAME
)
966 return get_index_from_offset (offset
,
967 SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt
)));
968 else if (is_gimple_assign (def_stmt
)
969 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
)
971 op1
= gimple_assign_rhs1 (def_stmt
);
972 op2
= gimple_assign_rhs2 (def_stmt
);
973 if (TREE_CODE (op1
) != INTEGER_CST
&& TREE_CODE (op2
) != INTEGER_CST
)
975 index
= (TREE_CODE (op1
) == INTEGER_CST
) ? op2
: op1
;
982 /* update MI->dimension_type_size[CURRENT_INDIRECT_LEVEL] with the size
983 of the type related to the SSA_VAR, or the type related to the
984 lhs of STMT, in the case that it is an INDIRECT_REF. */
986 update_type_size (struct matrix_info
*mi
, gimple stmt
, tree ssa_var
,
987 int current_indirect_level
)
990 HOST_WIDE_INT type_size
;
992 /* Update type according to the type of the INDIRECT_REF expr. */
993 if (is_gimple_assign (stmt
)
994 && TREE_CODE (gimple_assign_lhs (stmt
)) == INDIRECT_REF
)
996 lhs
= gimple_assign_lhs (stmt
);
997 gcc_assert (POINTER_TYPE_P
998 (TREE_TYPE (SSA_NAME_VAR (TREE_OPERAND (lhs
, 0)))));
1000 int_size_in_bytes (TREE_TYPE
1002 (SSA_NAME_VAR (TREE_OPERAND (lhs
, 0)))));
1005 type_size
= int_size_in_bytes (TREE_TYPE (ssa_var
));
1007 /* Record the size of elements accessed (as a whole)
1008 in the current indirection level (dimension). If the size of
1009 elements is not known at compile time, mark it as escaping. */
1011 mark_min_matrix_escape_level (mi
, current_indirect_level
, stmt
);
1014 int l
= current_indirect_level
;
1016 if (!mi
->dimension_type_size
)
1018 mi
->dimension_type_size
1019 = (HOST_WIDE_INT
*) xcalloc (l
+ 1, sizeof (HOST_WIDE_INT
));
1020 mi
->dimension_type_size_len
= l
+ 1;
1022 else if (mi
->dimension_type_size_len
< l
+ 1)
1024 mi
->dimension_type_size
1025 = (HOST_WIDE_INT
*) xrealloc (mi
->dimension_type_size
,
1026 (l
+ 1) * sizeof (HOST_WIDE_INT
));
1027 memset (&mi
->dimension_type_size
[mi
->dimension_type_size_len
],
1028 0, (l
+ 1 - mi
->dimension_type_size_len
)
1029 * sizeof (HOST_WIDE_INT
));
1030 mi
->dimension_type_size_len
= l
+ 1;
1032 /* Make sure all the accesses in the same level have the same size
1034 if (!mi
->dimension_type_size
[l
])
1035 mi
->dimension_type_size
[l
] = type_size
;
1036 else if (mi
->dimension_type_size
[l
] != type_size
)
1037 mark_min_matrix_escape_level (mi
, l
, stmt
);
1041 /* USE_STMT represents a GIMPLE_CALL, where one of the arguments is the
1042 ssa var that we want to check because it came from some use of matrix
1043 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so
1047 analyze_accesses_for_call_stmt (struct matrix_info
*mi
, tree ssa_var
,
1048 gimple use_stmt
, int current_indirect_level
)
1050 tree fndecl
= gimple_call_fndecl (use_stmt
);
1052 if (gimple_call_lhs (use_stmt
))
1054 tree lhs
= gimple_call_lhs (use_stmt
);
1055 struct ssa_acc_in_tree lhs_acc
, rhs_acc
;
1057 memset (&lhs_acc
, 0, sizeof (lhs_acc
));
1058 memset (&rhs_acc
, 0, sizeof (rhs_acc
));
1060 lhs_acc
.ssa_var
= ssa_var
;
1061 lhs_acc
.t_code
= ERROR_MARK
;
1062 ssa_accessed_in_tree (lhs
, &lhs_acc
);
1063 rhs_acc
.ssa_var
= ssa_var
;
1064 rhs_acc
.t_code
= ERROR_MARK
;
1065 ssa_accessed_in_call_rhs (use_stmt
, &rhs_acc
);
1067 /* The SSA must be either in the left side or in the right side,
1068 to understand what is happening.
1069 In case the SSA_NAME is found in both sides we should be escaping
1070 at this level because in this case we cannot calculate the
1071 address correctly. */
1072 if ((lhs_acc
.var_found
&& rhs_acc
.var_found
1073 && lhs_acc
.t_code
== INDIRECT_REF
)
1074 || (!rhs_acc
.var_found
&& !lhs_acc
.var_found
))
1076 mark_min_matrix_escape_level (mi
, current_indirect_level
, use_stmt
);
1077 return current_indirect_level
;
1079 gcc_assert (!rhs_acc
.var_found
|| !lhs_acc
.var_found
);
1081 /* If we are storing to the matrix at some level, then mark it as
1082 escaping at that level. */
1083 if (lhs_acc
.var_found
)
1085 int l
= current_indirect_level
+ 1;
1087 gcc_assert (lhs_acc
.t_code
== INDIRECT_REF
);
1088 mark_min_matrix_escape_level (mi
, l
, use_stmt
);
1089 return current_indirect_level
;
1095 if (DECL_FUNCTION_CODE (fndecl
) != BUILT_IN_FREE
)
1099 "Matrix %s: Function call %s, level %d escapes.\n",
1100 get_name (mi
->decl
), get_name (fndecl
),
1101 current_indirect_level
);
1102 mark_min_matrix_escape_level (mi
, current_indirect_level
, use_stmt
);
1104 else if (mi
->free_stmts
[current_indirect_level
].stmt
!= NULL
1105 && mi
->free_stmts
[current_indirect_level
].stmt
!= use_stmt
)
1106 mark_min_matrix_escape_level (mi
, current_indirect_level
, use_stmt
);
1109 /*Record the free statements so we can delete them
1111 int l
= current_indirect_level
;
1113 mi
->free_stmts
[l
].stmt
= use_stmt
;
1114 mi
->free_stmts
[l
].func
= current_function_decl
;
1117 return current_indirect_level
;
1120 /* USE_STMT represents a phi node of the ssa var that we want to
1121 check because it came from some use of matrix
1123 We check all the escaping levels that get to the PHI node
1124 and make sure they are all the same escaping;
1125 if not (which is rare) we let the escaping level be the
1126 minimum level that gets into that PHI because starting from
1127 that level we cannot expect the behavior of the indirections.
1128 CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1131 analyze_accesses_for_phi_node (struct matrix_info
*mi
, gimple use_stmt
,
1132 int current_indirect_level
, sbitmap visited
,
1133 bool record_accesses
)
1136 struct matrix_access_phi_node tmp_maphi
, *maphi
, **pmaphi
;
1138 tmp_maphi
.phi
= use_stmt
;
1139 if ((maphi
= (struct matrix_access_phi_node
*)
1140 htab_find (htab_mat_acc_phi_nodes
, &tmp_maphi
)))
1142 if (maphi
->indirection_level
== current_indirect_level
)
1146 int level
= MIN (maphi
->indirection_level
,
1147 current_indirect_level
);
1151 maphi
->indirection_level
= level
;
1152 for (j
= 0; j
< gimple_phi_num_args (use_stmt
); j
++)
1154 tree def
= PHI_ARG_DEF (use_stmt
, j
);
1156 if (gimple_code (SSA_NAME_DEF_STMT (def
)) != GIMPLE_PHI
)
1157 stmt
= SSA_NAME_DEF_STMT (def
);
1159 mark_min_matrix_escape_level (mi
, level
, stmt
);
1163 maphi
= (struct matrix_access_phi_node
*)
1164 xcalloc (1, sizeof (struct matrix_access_phi_node
));
1165 maphi
->phi
= use_stmt
;
1166 maphi
->indirection_level
= current_indirect_level
;
1168 /* Insert to hash table. */
1169 pmaphi
= (struct matrix_access_phi_node
**)
1170 htab_find_slot (htab_mat_acc_phi_nodes
, maphi
, INSERT
);
1171 gcc_assert (pmaphi
);
1174 if (!TEST_BIT (visited
, SSA_NAME_VERSION (PHI_RESULT (use_stmt
))))
1176 SET_BIT (visited
, SSA_NAME_VERSION (PHI_RESULT (use_stmt
)));
1177 analyze_matrix_accesses (mi
, PHI_RESULT (use_stmt
),
1178 current_indirect_level
, false, visited
,
1180 RESET_BIT (visited
, SSA_NAME_VERSION (PHI_RESULT (use_stmt
)));
1184 /* USE_STMT represents an assign statement (the rhs or lhs include
1185 the ssa var that we want to check because it came from some use of matrix
1186 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1189 analyze_accesses_for_assign_stmt (struct matrix_info
*mi
, tree ssa_var
,
1190 gimple use_stmt
, int current_indirect_level
,
1191 bool last_op
, sbitmap visited
,
1192 bool record_accesses
)
1194 tree lhs
= gimple_get_lhs (use_stmt
);
1195 struct ssa_acc_in_tree lhs_acc
, rhs_acc
;
1197 memset (&lhs_acc
, 0, sizeof (lhs_acc
));
1198 memset (&rhs_acc
, 0, sizeof (rhs_acc
));
1200 lhs_acc
.ssa_var
= ssa_var
;
1201 lhs_acc
.t_code
= ERROR_MARK
;
1202 ssa_accessed_in_tree (lhs
, &lhs_acc
);
1203 rhs_acc
.ssa_var
= ssa_var
;
1204 rhs_acc
.t_code
= ERROR_MARK
;
1205 ssa_accessed_in_assign_rhs (use_stmt
, &rhs_acc
);
1207 /* The SSA must be either in the left side or in the right side,
1208 to understand what is happening.
1209 In case the SSA_NAME is found in both sides we should be escaping
1210 at this level because in this case we cannot calculate the
1211 address correctly. */
1212 if ((lhs_acc
.var_found
&& rhs_acc
.var_found
1213 && lhs_acc
.t_code
== INDIRECT_REF
)
1214 || (!rhs_acc
.var_found
&& !lhs_acc
.var_found
))
1216 mark_min_matrix_escape_level (mi
, current_indirect_level
, use_stmt
);
1217 return current_indirect_level
;
1219 gcc_assert (!rhs_acc
.var_found
|| !lhs_acc
.var_found
);
1221 /* If we are storing to the matrix at some level, then mark it as
1222 escaping at that level. */
1223 if (lhs_acc
.var_found
)
1225 int l
= current_indirect_level
+ 1;
1227 gcc_assert (lhs_acc
.t_code
== INDIRECT_REF
);
1229 if (!(gimple_assign_copy_p (use_stmt
)
1230 || gimple_assign_cast_p (use_stmt
))
1231 || (TREE_CODE (gimple_assign_rhs1 (use_stmt
)) != SSA_NAME
))
1232 mark_min_matrix_escape_level (mi
, l
, use_stmt
);
1235 gimple def_stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (use_stmt
));
1236 analyze_matrix_allocation_site (mi
, def_stmt
, l
, visited
);
1237 if (record_accesses
)
1238 record_access_alloc_site_info (mi
, use_stmt
, NULL_TREE
,
1239 NULL_TREE
, l
, true);
1240 update_type_size (mi
, use_stmt
, NULL
, l
);
1242 return current_indirect_level
;
1244 /* Now, check the right-hand-side, to see how the SSA variable
1246 if (rhs_acc
.var_found
)
1248 if (rhs_acc
.t_code
!= INDIRECT_REF
1249 && rhs_acc
.t_code
!= POINTER_PLUS_EXPR
&& rhs_acc
.t_code
!= SSA_NAME
)
1251 mark_min_matrix_escape_level (mi
, current_indirect_level
, use_stmt
);
1252 return current_indirect_level
;
1254 /* If the access in the RHS has an indirection increase the
1255 indirection level. */
1256 if (rhs_acc
.t_code
== INDIRECT_REF
)
1258 if (record_accesses
)
1259 record_access_alloc_site_info (mi
, use_stmt
, NULL_TREE
,
1261 current_indirect_level
, true);
1262 current_indirect_level
+= 1;
1264 else if (rhs_acc
.t_code
== POINTER_PLUS_EXPR
)
1266 gcc_assert (rhs_acc
.second_op
);
1268 /* Currently we support only one PLUS expression on the
1269 SSA_NAME that holds the base address of the current
1270 indirection level; to support more general case there
1271 is a need to hold a stack of expressions and regenerate
1272 the calculation later. */
1273 mark_min_matrix_escape_level (mi
, current_indirect_level
,
1280 op1
= gimple_assign_rhs1 (use_stmt
);
1281 op2
= gimple_assign_rhs2 (use_stmt
);
1283 op2
= (op1
== ssa_var
) ? op2
: op1
;
1284 if (TREE_CODE (op2
) == INTEGER_CST
)
1286 build_int_cst (TREE_TYPE (op1
),
1287 TREE_INT_CST_LOW (op2
) /
1288 int_size_in_bytes (TREE_TYPE (op1
)));
1292 get_index_from_offset (op2
, SSA_NAME_DEF_STMT (op2
));
1293 if (index
== NULL_TREE
)
1295 mark_min_matrix_escape_level (mi
,
1296 current_indirect_level
,
1298 return current_indirect_level
;
1301 if (record_accesses
)
1302 record_access_alloc_site_info (mi
, use_stmt
, op2
,
1304 current_indirect_level
, false);
1307 /* If we are storing this level of indirection mark it as
1309 if (lhs_acc
.t_code
== INDIRECT_REF
|| TREE_CODE (lhs
) != SSA_NAME
)
1311 int l
= current_indirect_level
;
1313 /* One exception is when we are storing to the matrix
1314 variable itself; this is the case of malloc, we must make
1315 sure that it's the one and only one call to malloc so
1316 we call analyze_matrix_allocation_site to check
1318 if (TREE_CODE (lhs
) != VAR_DECL
|| lhs
!= mi
->decl
)
1319 mark_min_matrix_escape_level (mi
, current_indirect_level
,
1323 /* Also update the escaping level. */
1324 analyze_matrix_allocation_site (mi
, use_stmt
, l
, visited
);
1325 if (record_accesses
)
1326 record_access_alloc_site_info (mi
, use_stmt
, NULL_TREE
,
1327 NULL_TREE
, l
, true);
1332 /* We are placing it in an SSA, follow that SSA. */
1333 analyze_matrix_accesses (mi
, lhs
,
1334 current_indirect_level
,
1335 rhs_acc
.t_code
== POINTER_PLUS_EXPR
,
1336 visited
, record_accesses
);
1339 return current_indirect_level
;
1342 /* Given a SSA_VAR (coming from a use statement of the matrix MI),
1343 follow its uses and level of indirection and find out the minimum
1344 indirection level it escapes in (the highest dimension) and the maximum
1345 level it is accessed in (this will be the actual dimension of the
1346 matrix). The information is accumulated in MI.
1347 We look at the immediate uses, if one escapes we finish; if not,
1348 we make a recursive call for each one of the immediate uses of the
1349 resulting SSA name. */
1351 analyze_matrix_accesses (struct matrix_info
*mi
, tree ssa_var
,
1352 int current_indirect_level
, bool last_op
,
1353 sbitmap visited
, bool record_accesses
)
1355 imm_use_iterator imm_iter
;
1356 use_operand_p use_p
;
1358 update_type_size (mi
, SSA_NAME_DEF_STMT (ssa_var
), ssa_var
,
1359 current_indirect_level
);
1361 /* We don't go beyond the escaping level when we are performing the
1362 flattening. NOTE: we keep the last indirection level that doesn't
1364 if (mi
->min_indirect_level_escape
> -1
1365 && mi
->min_indirect_level_escape
<= current_indirect_level
)
1368 /* Now go over the uses of the SSA_NAME and check how it is used in
1369 each one of them. We are mainly looking for the pattern INDIRECT_REF,
1370 then a POINTER_PLUS_EXPR, then INDIRECT_REF etc. while in between there could
1371 be any number of copies and casts. */
1372 gcc_assert (TREE_CODE (ssa_var
) == SSA_NAME
);
1374 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, ssa_var
)
1376 gimple use_stmt
= USE_STMT (use_p
);
1377 if (gimple_code (use_stmt
) == GIMPLE_PHI
)
1378 /* We check all the escaping levels that get to the PHI node
1379 and make sure they are all the same escaping;
1380 if not (which is rare) we let the escaping level be the
1381 minimum level that gets into that PHI because starting from
1382 that level we cannot expect the behavior of the indirections. */
1384 analyze_accesses_for_phi_node (mi
, use_stmt
, current_indirect_level
,
1385 visited
, record_accesses
);
1387 else if (is_gimple_call (use_stmt
))
1388 analyze_accesses_for_call_stmt (mi
, ssa_var
, use_stmt
,
1389 current_indirect_level
);
1390 else if (is_gimple_assign (use_stmt
))
1391 current_indirect_level
=
1392 analyze_accesses_for_assign_stmt (mi
, ssa_var
, use_stmt
,
1393 current_indirect_level
, last_op
,
1394 visited
, record_accesses
);
1404 /* A walk_tree function to go over the VAR_DECL, PARM_DECL nodes of
1405 the malloc size expression and check that those aren't changed
1406 over the function. */
1408 check_var_notmodified_p (tree
* tp
, int *walk_subtrees
, void *data
)
1412 check_var_data
*callback_data
= (check_var_data
*) data
;
1413 tree fn
= callback_data
->fn
;
1414 gimple_stmt_iterator gsi
;
1417 if (TREE_CODE (t
) != VAR_DECL
&& TREE_CODE (t
) != PARM_DECL
)
1420 FOR_EACH_BB_FN (bb
, DECL_STRUCT_FUNCTION (fn
))
1422 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1424 stmt
= gsi_stmt (gsi
);
1425 if (!is_gimple_assign (stmt
) && !is_gimple_call (stmt
))
1427 if (gimple_get_lhs (stmt
) == t
)
1429 callback_data
->stmt
= stmt
;
1438 /* Go backwards in the use-def chains and find out the expression
1439 represented by the possible SSA name in STMT, until it is composed
1440 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1441 we make sure that all the arguments represent the same subexpression,
1442 otherwise we fail. */
1445 can_calculate_stmt_before_stmt (gimple stmt
, sbitmap visited
)
1448 enum tree_code code
;
1450 switch (gimple_code (stmt
))
1453 code
= gimple_assign_rhs_code (stmt
);
1454 op1
= gimple_assign_rhs1 (stmt
);
1458 case POINTER_PLUS_EXPR
:
1463 op2
= gimple_assign_rhs2 (stmt
);
1464 op1
= can_calculate_expr_before_stmt (op1
, visited
);
1467 op2
= can_calculate_expr_before_stmt (op2
, visited
);
1469 return fold_build2 (code
, gimple_expr_type (stmt
), op1
, op2
);
1473 res
= can_calculate_expr_before_stmt (op1
, visited
);
1474 if (res
!= NULL_TREE
)
1475 return build1 (code
, gimple_expr_type (stmt
), res
);
1480 if (gimple_assign_single_p (stmt
))
1481 return can_calculate_expr_before_stmt (op1
, visited
);
1491 /* Make sure all the arguments represent the same value. */
1492 for (j
= 0; j
< gimple_phi_num_args (stmt
); j
++)
1495 tree def
= PHI_ARG_DEF (stmt
, j
);
1497 new_res
= can_calculate_expr_before_stmt (def
, visited
);
1498 if (res
== NULL_TREE
)
1500 else if (!new_res
|| !expressions_equal_p (res
, new_res
))
1511 /* Go backwards in the use-def chains and find out the expression
1512 represented by the possible SSA name in EXPR, until it is composed
1513 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1514 we make sure that all the arguments represent the same subexpression,
1515 otherwise we fail. */
1517 can_calculate_expr_before_stmt (tree expr
, sbitmap visited
)
1522 switch (TREE_CODE (expr
))
1525 /* Case of loop, we don't know to represent this expression. */
1526 if (TEST_BIT (visited
, SSA_NAME_VERSION (expr
)))
1529 SET_BIT (visited
, SSA_NAME_VERSION (expr
));
1530 def_stmt
= SSA_NAME_DEF_STMT (expr
);
1531 res
= can_calculate_stmt_before_stmt (def_stmt
, visited
);
1532 RESET_BIT (visited
, SSA_NAME_VERSION (expr
));
1544 /* There should be only one allocation function for the dimensions
1545 that don't escape. Here we check the allocation sites in this
1546 function. We must make sure that all the dimensions are allocated
1547 using malloc and that the malloc size parameter expression could be
1548 pre-calculated before the call to the malloc of dimension 0.
1550 Given a candidate matrix for flattening -- MI -- check if it's
1551 appropriate for flattening -- we analyze the allocation
1552 sites that we recorded in the previous analysis. The result of the
1553 analysis is a level of indirection (matrix dimension) in which the
1554 flattening is safe. We check the following conditions:
1555 1. There is only one allocation site for each dimension.
1556 2. The allocation sites of all the dimensions are in the same
1558 (The above two are being taken care of during the analysis when
1559 we check the allocation site).
1560 3. All the dimensions that we flatten are allocated at once; thus
1561 the total size must be known before the allocation of the
1562 dimension 0 (top level) -- we must make sure we represent the
1563 size of the allocation as an expression of global parameters or
1564 constants and that those doesn't change over the function. */
1567 check_allocation_function (void **slot
, void *data ATTRIBUTE_UNUSED
)
1570 gimple_stmt_iterator gsi
;
1571 basic_block bb_level_0
;
1572 struct matrix_info
*mi
= (struct matrix_info
*) *slot
;
1575 if (!mi
->malloc_for_level
)
1578 visited
= sbitmap_alloc (num_ssa_names
);
1580 /* Do nothing if the current function is not the allocation
1582 if (mi
->allocation_function_decl
!= current_function_decl
1583 /* We aren't in the main allocation function yet. */
1584 || !mi
->malloc_for_level
[0])
1587 for (level
= 1; level
< mi
->max_malloced_level
; level
++)
1588 if (!mi
->malloc_for_level
[level
])
1591 mark_min_matrix_escape_level (mi
, level
, NULL
);
1593 gsi
= gsi_for_stmt (mi
->malloc_for_level
[0]);
1594 bb_level_0
= gsi
.bb
;
1596 /* Check if the expression of the size passed to malloc could be
1597 pre-calculated before the malloc of level 0. */
1598 for (level
= 1; level
< mi
->min_indirect_level_escape
; level
++)
1602 struct malloc_call_data mcd
= {NULL
, NULL_TREE
, NULL_TREE
};
1604 call_stmt
= mi
->malloc_for_level
[level
];
1606 /* Find the correct malloc information. */
1607 collect_data_for_malloc_call (call_stmt
, &mcd
);
1609 /* No need to check anticipation for constants. */
1610 if (TREE_CODE (mcd
.size_var
) == INTEGER_CST
)
1612 if (!mi
->dimension_size
)
1614 mi
->dimension_size
=
1615 (tree
*) xcalloc (mi
->min_indirect_level_escape
,
1617 mi
->dimension_size_orig
=
1618 (tree
*) xcalloc (mi
->min_indirect_level_escape
,
1621 mi
->dimension_size
[level
] = mcd
.size_var
;
1622 mi
->dimension_size_orig
[level
] = mcd
.size_var
;
1625 /* ??? Here we should also add the way to calculate the size
1626 expression not only know that it is anticipated. */
1627 sbitmap_zero (visited
);
1628 size
= can_calculate_expr_before_stmt (mcd
.size_var
, visited
);
1629 if (size
== NULL_TREE
)
1631 mark_min_matrix_escape_level (mi
, level
, call_stmt
);
1634 "Matrix %s: Cannot calculate the size of allocation, escaping at level %d\n",
1635 get_name (mi
->decl
), level
);
1638 if (!mi
->dimension_size
)
1640 mi
->dimension_size
=
1641 (tree
*) xcalloc (mi
->min_indirect_level_escape
, sizeof (tree
));
1642 mi
->dimension_size_orig
=
1643 (tree
*) xcalloc (mi
->min_indirect_level_escape
, sizeof (tree
));
1645 mi
->dimension_size
[level
] = size
;
1646 mi
->dimension_size_orig
[level
] = size
;
1649 /* We don't need those anymore. */
1650 for (level
= mi
->min_indirect_level_escape
;
1651 level
< mi
->max_malloced_level
; level
++)
1652 mi
->malloc_for_level
[level
] = NULL
;
1656 /* Track all access and allocation sites. */
1658 find_sites_in_func (bool record
)
1660 sbitmap visited_stmts_1
;
1662 gimple_stmt_iterator gsi
;
1665 struct matrix_info tmpmi
, *mi
;
1667 visited_stmts_1
= sbitmap_alloc (num_ssa_names
);
1671 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1675 stmt
= gsi_stmt (gsi
);
1676 lhs
= gimple_get_lhs (stmt
);
1677 if (lhs
!= NULL_TREE
1678 && TREE_CODE (lhs
) == VAR_DECL
)
1681 if ((mi
= (struct matrix_info
*) htab_find (matrices_to_reorg
,
1684 sbitmap_zero (visited_stmts_1
);
1685 analyze_matrix_allocation_site (mi
, stmt
, 0, visited_stmts_1
);
1688 if (is_gimple_assign (stmt
)
1689 && gimple_assign_single_p (stmt
)
1690 && TREE_CODE (lhs
) == SSA_NAME
1691 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == VAR_DECL
)
1693 tmpmi
.decl
= gimple_assign_rhs1 (stmt
);
1694 if ((mi
= (struct matrix_info
*) htab_find (matrices_to_reorg
,
1697 sbitmap_zero (visited_stmts_1
);
1698 analyze_matrix_accesses (mi
, lhs
, 0,
1699 false, visited_stmts_1
, record
);
1704 sbitmap_free (visited_stmts_1
);
1707 /* Traverse the use-def chains to see if there are matrices that
1708 are passed through pointers and we cannot know how they are accessed.
1709 For each SSA-name defined by a global variable of our interest,
1710 we traverse the use-def chains of the SSA and follow the indirections,
1711 and record in what level of indirection the use of the variable
1712 escapes. A use of a pointer escapes when it is passed to a function,
1713 stored into memory or assigned (except in malloc and free calls). */
1716 record_all_accesses_in_func (void)
1719 sbitmap visited_stmts_1
;
1721 visited_stmts_1
= sbitmap_alloc (num_ssa_names
);
1723 for (i
= 0; i
< num_ssa_names
; i
++)
1725 struct matrix_info tmpmi
, *mi
;
1726 tree ssa_var
= ssa_name (i
);
1730 || !is_gimple_assign (SSA_NAME_DEF_STMT (ssa_var
))
1731 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (ssa_var
)))
1733 rhs
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (ssa_var
));
1734 lhs
= gimple_assign_lhs (SSA_NAME_DEF_STMT (ssa_var
));
1735 if (TREE_CODE (rhs
) != VAR_DECL
&& TREE_CODE (lhs
) != VAR_DECL
)
1738 /* If the RHS is a matrix that we want to analyze, follow the def-use
1739 chain for this SSA_VAR and check for escapes or apply the
1742 if ((mi
= (struct matrix_info
*) htab_find (matrices_to_reorg
, &tmpmi
)))
1744 /* This variable will track the visited PHI nodes, so we can limit
1745 its size to the maximum number of SSA names. */
1746 sbitmap_zero (visited_stmts_1
);
1747 analyze_matrix_accesses (mi
, ssa_var
,
1748 0, false, visited_stmts_1
, true);
1752 sbitmap_free (visited_stmts_1
);
1755 /* Used when we want to convert the expression: RESULT = something *
1756 ORIG to RESULT = something * NEW_VAL. If ORIG and NEW_VAL are power
1757 of 2, shift operations can be done, else division and
1761 compute_offset (HOST_WIDE_INT orig
, HOST_WIDE_INT new_val
, tree result
)
1765 tree result1
, ratio
, log
, orig_tree
, new_tree
;
1767 x
= exact_log2 (orig
);
1768 y
= exact_log2 (new_val
);
1770 if (x
!= -1 && y
!= -1)
1776 log
= build_int_cst (TREE_TYPE (result
), x
- y
);
1778 fold_build2 (LSHIFT_EXPR
, TREE_TYPE (result
), result
, log
);
1781 log
= build_int_cst (TREE_TYPE (result
), y
- x
);
1782 result1
= fold_build2 (RSHIFT_EXPR
, TREE_TYPE (result
), result
, log
);
1786 orig_tree
= build_int_cst (TREE_TYPE (result
), orig
);
1787 new_tree
= build_int_cst (TREE_TYPE (result
), new_val
);
1788 ratio
= fold_build2 (TRUNC_DIV_EXPR
, TREE_TYPE (result
), result
, orig_tree
);
1789 result1
= fold_build2 (MULT_EXPR
, TREE_TYPE (result
), ratio
, new_tree
);
1795 /* We know that we are allowed to perform matrix flattening (according to the
1796 escape analysis), so we traverse the use-def chains of the SSA vars
1797 defined by the global variables pointing to the matrices of our interest.
1798 in each use of the SSA we calculate the offset from the base address
1799 according to the following equation:
1801 a[I1][I2]...[Ik] , where D1..Dk is the length of each dimension and the
1802 escaping level is m <= k, and a' is the new allocated matrix,
1803 will be translated to :
1808 b = a' + I1*D2...*Dm + I2*D3...Dm + ... + Im
1812 transform_access_sites (void **slot
, void *data ATTRIBUTE_UNUSED
)
1814 gimple_stmt_iterator gsi
;
1815 struct matrix_info
*mi
= (struct matrix_info
*) *slot
;
1816 int min_escape_l
= mi
->min_indirect_level_escape
;
1817 struct access_site_info
*acc_info
;
1818 enum tree_code code
;
1821 if (min_escape_l
< 2 || !mi
->access_l
)
1823 for (i
= 0; VEC_iterate (access_site_info_p
, mi
->access_l
, i
, acc_info
);
1826 /* This is possible because we collect the access sites before
1827 we determine the final minimum indirection level. */
1828 if (acc_info
->level
>= min_escape_l
)
1833 if (acc_info
->is_alloc
)
1835 if (acc_info
->level
>= 0 && gimple_bb (acc_info
->stmt
))
1839 gimple stmt
= acc_info
->stmt
;
1842 FOR_EACH_SSA_TREE_OPERAND (def
, stmt
, iter
, SSA_OP_DEF
)
1843 mark_sym_for_renaming (SSA_NAME_VAR (def
));
1844 gsi
= gsi_for_stmt (stmt
);
1845 gcc_assert (is_gimple_assign (acc_info
->stmt
));
1846 lhs
= gimple_assign_lhs (acc_info
->stmt
);
1847 if (TREE_CODE (lhs
) == SSA_NAME
1848 && acc_info
->level
< min_escape_l
- 1)
1850 imm_use_iterator imm_iter
;
1851 use_operand_p use_p
;
1854 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, lhs
)
1855 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
1860 gcc_assert (gimple_assign_rhs_code (acc_info
->stmt
)
1862 /* Emit convert statement to convert to type of use. */
1863 tmp
= create_tmp_var (TREE_TYPE (lhs
), "new");
1864 add_referenced_var (tmp
);
1865 rhs
= gimple_assign_rhs1 (acc_info
->stmt
);
1866 rhs
= fold_convert (TREE_TYPE (tmp
),
1867 TREE_OPERAND (rhs
, 0));
1868 new_stmt
= gimple_build_assign (tmp
, rhs
);
1869 tmp
= make_ssa_name (tmp
, new_stmt
);
1870 gimple_assign_set_lhs (new_stmt
, tmp
);
1871 gsi
= gsi_for_stmt (acc_info
->stmt
);
1872 gsi_insert_after (&gsi
, new_stmt
, GSI_SAME_STMT
);
1873 SET_USE (use_p
, tmp
);
1876 if (acc_info
->level
< min_escape_l
- 1)
1877 gsi_remove (&gsi
, true);
1882 code
= gimple_assign_rhs_code (acc_info
->stmt
);
1883 if (code
== INDIRECT_REF
1884 && acc_info
->level
< min_escape_l
- 1)
1886 /* Replace the INDIRECT_REF with NOP (cast) usually we are casting
1887 from "pointer to type" to "type". */
1889 build1 (NOP_EXPR
, TREE_TYPE (gimple_assign_rhs1 (acc_info
->stmt
)),
1890 TREE_OPERAND (gimple_assign_rhs1 (acc_info
->stmt
), 0));
1891 gimple_assign_set_rhs_code (acc_info
->stmt
, NOP_EXPR
);
1892 gimple_assign_set_rhs1 (acc_info
->stmt
, t
);
1894 else if (code
== POINTER_PLUS_EXPR
1895 && acc_info
->level
< (min_escape_l
))
1897 imm_use_iterator imm_iter
;
1898 use_operand_p use_p
;
1901 int k
= acc_info
->level
;
1902 tree num_elements
, total_elements
;
1904 tree d_size
= mi
->dimension_size
[k
];
1906 /* We already make sure in the analysis that the first operand
1907 is the base and the second is the offset. */
1908 offset
= acc_info
->offset
;
1909 if (mi
->dim_map
[k
] == min_escape_l
- 1)
1911 if (!check_transpose_p
|| mi
->is_transposed_p
== false)
1916 tree d_type_size
, d_type_size_k
;
1918 d_type_size
= size_int (mi
->dimension_type_size
[min_escape_l
]);
1919 d_type_size_k
= size_int (mi
->dimension_type_size
[k
+ 1]);
1922 compute_offset (mi
->dimension_type_size
[min_escape_l
],
1923 mi
->dimension_type_size
[k
+ 1], offset
);
1925 total_elements
= new_offset
;
1926 if (new_offset
!= offset
)
1928 gsi
= gsi_for_stmt (acc_info
->stmt
);
1929 tmp1
= force_gimple_operand_gsi (&gsi
, total_elements
,
1931 true, GSI_SAME_STMT
);
1939 d_size
= mi
->dimension_size
[mi
->dim_map
[k
] + 1];
1941 fold_build2 (MULT_EXPR
, sizetype
, fold_convert (sizetype
, acc_info
->index
),
1942 fold_convert (sizetype
, d_size
));
1943 add_referenced_var (d_size
);
1944 gsi
= gsi_for_stmt (acc_info
->stmt
);
1945 tmp1
= force_gimple_operand_gsi (&gsi
, num_elements
, true,
1946 NULL
, true, GSI_SAME_STMT
);
1948 /* Replace the offset if needed. */
1951 if (TREE_CODE (offset
) == SSA_NAME
)
1955 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, offset
)
1956 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
1957 if (use_stmt
== acc_info
->stmt
)
1958 SET_USE (use_p
, tmp1
);
1962 gcc_assert (TREE_CODE (offset
) == INTEGER_CST
);
1963 gimple_assign_set_rhs2 (acc_info
->stmt
, tmp1
);
1964 update_stmt (acc_info
->stmt
);
1968 /* ??? meanwhile this happens because we record the same access
1969 site more than once; we should be using a hash table to
1970 avoid this and insert the STMT of the access site only
1973 gcc_unreachable (); */
1976 VEC_free (access_site_info_p
, heap
, mi
->access_l
);
1978 update_ssa (TODO_update_ssa
);
1979 #ifdef ENABLE_CHECKING
1985 /* Sort A array of counts. Arrange DIM_MAP to reflect the new order. */
1988 sort_dim_hot_level (gcov_type
* a
, int *dim_map
, int n
)
1993 for (i
= 0; i
< n
- 1; i
++)
1995 for (j
= 0; j
< n
- 1 - i
; j
++)
1997 if (a
[j
+ 1] < a
[j
])
1999 tmp
= a
[j
]; /* swap a[j] and a[j+1] */
2003 dim_map
[j
] = dim_map
[j
+ 1];
2004 dim_map
[j
+ 1] = tmp1
;
2010 /* Replace multiple mallocs (one for each dimension) to one malloc
2011 with the size of DIM1*DIM2*...*DIMN*size_of_element
2012 Make sure that we hold the size in the malloc site inside a
2013 new global variable; this way we ensure that the size doesn't
2014 change and it is accessible from all the other functions that
2015 uses the matrix. Also, the original calls to free are deleted,
2016 and replaced by a new call to free the flattened matrix. */
2019 transform_allocation_sites (void **slot
, void *data ATTRIBUTE_UNUSED
)
2022 struct matrix_info
*mi
;
2023 tree type
, oldfn
, prev_dim_size
;
2024 gimple call_stmt_0
, use_stmt
;
2025 struct cgraph_node
*c_node
;
2026 struct cgraph_edge
*e
;
2027 gimple_stmt_iterator gsi
;
2028 struct malloc_call_data mcd
= {NULL
, NULL_TREE
, NULL_TREE
};
2029 HOST_WIDE_INT element_size
;
2031 imm_use_iterator imm_iter
;
2032 use_operand_p use_p
;
2033 tree old_size_0
, tmp
;
2037 mi
= (struct matrix_info
*) *slot
;
2039 min_escape_l
= mi
->min_indirect_level_escape
;
2041 if (!mi
->malloc_for_level
)
2042 mi
->min_indirect_level_escape
= 0;
2044 if (mi
->min_indirect_level_escape
< 2)
2047 mi
->dim_map
= (int *) xcalloc (mi
->min_indirect_level_escape
, sizeof (int));
2048 for (i
= 0; i
< mi
->min_indirect_level_escape
; i
++)
2050 if (check_transpose_p
)
2056 fprintf (dump_file
, "Matrix %s:\n", get_name (mi
->decl
));
2057 for (i
= 0; i
< min_escape_l
; i
++)
2059 fprintf (dump_file
, "dim %d before sort ", i
);
2060 if (mi
->dim_hot_level
)
2062 "count is " HOST_WIDEST_INT_PRINT_DEC
" \n",
2063 mi
->dim_hot_level
[i
]);
2066 sort_dim_hot_level (mi
->dim_hot_level
, mi
->dim_map
,
2067 mi
->min_indirect_level_escape
);
2069 for (i
= 0; i
< min_escape_l
; i
++)
2071 fprintf (dump_file
, "dim %d after sort\n", i
);
2072 if (mi
->dim_hot_level
)
2073 fprintf (dump_file
, "count is " HOST_WIDE_INT_PRINT_DEC
2074 " \n", (HOST_WIDE_INT
) mi
->dim_hot_level
[i
]);
2076 for (i
= 0; i
< mi
->min_indirect_level_escape
; i
++)
2079 fprintf (dump_file
, "dim_map[%d] after sort %d\n", i
,
2081 if (mi
->dim_map
[i
] != i
)
2085 "Transposed dimensions: dim %d is now dim %d\n",
2087 mi
->is_transposed_p
= true;
2093 for (i
= 0; i
< mi
->min_indirect_level_escape
; i
++)
2096 /* Call statement of allocation site of level 0. */
2097 call_stmt_0
= mi
->malloc_for_level
[0];
2099 /* Finds the correct malloc information. */
2100 collect_data_for_malloc_call (call_stmt_0
, &mcd
);
2102 mi
->dimension_size
[0] = mcd
.size_var
;
2103 mi
->dimension_size_orig
[0] = mcd
.size_var
;
2104 /* Make sure that the variables in the size expression for
2105 all the dimensions (above level 0) aren't modified in
2106 the allocation function. */
2107 for (i
= 1; i
< mi
->min_indirect_level_escape
; i
++)
2110 check_var_data data
;
2112 /* mi->dimension_size must contain the expression of the size calculated
2113 in check_allocation_function. */
2114 gcc_assert (mi
->dimension_size
[i
]);
2116 data
.fn
= mi
->allocation_function_decl
;
2118 t
= walk_tree_without_duplicates (&(mi
->dimension_size
[i
]),
2119 check_var_notmodified_p
,
2123 mark_min_matrix_escape_level (mi
, i
, data
.stmt
);
2128 if (mi
->min_indirect_level_escape
< 2)
2131 /* Since we should make sure that the size expression is available
2132 before the call to malloc of level 0. */
2133 gsi
= gsi_for_stmt (call_stmt_0
);
2135 /* Find out the size of each dimension by looking at the malloc
2136 sites and create a global variable to hold it.
2137 We add the assignment to the global before the malloc of level 0. */
2139 /* To be able to produce gimple temporaries. */
2140 oldfn
= current_function_decl
;
2141 current_function_decl
= mi
->allocation_function_decl
;
2142 push_cfun (DECL_STRUCT_FUNCTION (mi
->allocation_function_decl
));
2144 /* Set the dimension sizes as follows:
2145 DIM_SIZE[i] = DIM_SIZE[n] * ... * DIM_SIZE[i]
2146 where n is the maximum non escaping level. */
2147 element_size
= mi
->dimension_type_size
[mi
->min_indirect_level_escape
];
2148 prev_dim_size
= NULL_TREE
;
2150 for (i
= mi
->min_indirect_level_escape
- 1; i
>= 0; i
--)
2152 tree dim_size
, dim_var
;
2156 /* Now put the size expression in a global variable and initialize it to
2157 the size expression before the malloc of level 0. */
2159 add_new_static_var (TREE_TYPE
2160 (mi
->dimension_size_orig
[mi
->dim_map
[i
]]));
2161 type
= TREE_TYPE (mi
->dimension_size_orig
[mi
->dim_map
[i
]]);
2163 /* DIM_SIZE = MALLOC_SIZE_PARAM / TYPE_SIZE. */
2164 /* Find which dim ID becomes dim I. */
2165 for (id
= 0; id
< mi
->min_indirect_level_escape
; id
++)
2166 if (mi
->dim_map
[id
] == i
)
2169 build_int_cst (type
, mi
->dimension_type_size
[id
+ 1]);
2171 prev_dim_size
= build_int_cst (type
, element_size
);
2172 if (!check_transpose_p
&& i
== mi
->min_indirect_level_escape
- 1)
2174 dim_size
= mi
->dimension_size_orig
[id
];
2179 fold_build2 (TRUNC_DIV_EXPR
, type
, mi
->dimension_size_orig
[id
],
2182 dim_size
= fold_build2 (MULT_EXPR
, type
, dim_size
, prev_dim_size
);
2184 dim_size
= force_gimple_operand_gsi (&gsi
, dim_size
, true, NULL
,
2185 true, GSI_SAME_STMT
);
2186 /* GLOBAL_HOLDING_THE_SIZE = DIM_SIZE. */
2187 stmt
= gimple_build_assign (dim_var
, dim_size
);
2188 mark_symbols_for_renaming (stmt
);
2189 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
2191 prev_dim_size
= mi
->dimension_size
[i
] = dim_var
;
2193 update_ssa (TODO_update_ssa
);
2194 /* Replace the malloc size argument in the malloc of level 0 to be
2195 the size of all the dimensions. */
2196 c_node
= cgraph_node (mi
->allocation_function_decl
);
2197 old_size_0
= gimple_call_arg (call_stmt_0
, 0);
2198 tmp
= force_gimple_operand_gsi (&gsi
, mi
->dimension_size
[0], true,
2199 NULL
, true, GSI_SAME_STMT
);
2200 if (TREE_CODE (old_size_0
) == SSA_NAME
)
2202 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, old_size_0
)
2203 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
2204 if (use_stmt
== call_stmt_0
)
2205 SET_USE (use_p
, tmp
);
2207 /* When deleting the calls to malloc we need also to remove the edge from
2208 the call graph to keep it consistent. Notice that cgraph_edge may
2209 create a new node in the call graph if there is no node for the given
2210 declaration; this shouldn't be the case but currently there is no way to
2211 check this outside of "cgraph.c". */
2212 for (i
= 1; i
< mi
->min_indirect_level_escape
; i
++)
2214 gimple_stmt_iterator gsi
;
2215 gimple use_stmt1
= NULL
;
2217 gimple call_stmt
= mi
->malloc_for_level
[i
];
2218 gcc_assert (is_gimple_call (call_stmt
));
2219 e
= cgraph_edge (c_node
, call_stmt
);
2221 cgraph_remove_edge (e
);
2222 gsi
= gsi_for_stmt (call_stmt
);
2223 /* Remove the call stmt. */
2224 gsi_remove (&gsi
, true);
2225 /* remove the type cast stmt. */
2226 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
,
2227 gimple_call_lhs (call_stmt
))
2229 use_stmt1
= use_stmt
;
2230 gsi
= gsi_for_stmt (use_stmt
);
2231 gsi_remove (&gsi
, true);
2233 /* Remove the assignment of the allocated area. */
2234 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
,
2235 gimple_get_lhs (use_stmt1
))
2237 gsi
= gsi_for_stmt (use_stmt
);
2238 gsi_remove (&gsi
, true);
2241 update_ssa (TODO_update_ssa
);
2242 #ifdef ENABLE_CHECKING
2245 /* Delete the calls to free. */
2246 for (i
= 1; i
< mi
->min_indirect_level_escape
; i
++)
2248 gimple_stmt_iterator gsi
;
2250 /* ??? wonder why this case is possible but we failed on it once. */
2251 if (!mi
->free_stmts
[i
].stmt
)
2254 c_node
= cgraph_node (mi
->free_stmts
[i
].func
);
2255 gcc_assert (is_gimple_call (mi
->free_stmts
[i
].stmt
));
2256 e
= cgraph_edge (c_node
, mi
->free_stmts
[i
].stmt
);
2258 cgraph_remove_edge (e
);
2259 current_function_decl
= mi
->free_stmts
[i
].func
;
2260 set_cfun (DECL_STRUCT_FUNCTION (mi
->free_stmts
[i
].func
));
2261 gsi
= gsi_for_stmt (mi
->free_stmts
[i
].stmt
);
2262 gsi_remove (&gsi
, true);
2264 /* Return to the previous situation. */
2265 current_function_decl
= oldfn
;
2272 /* Print out the results of the escape analysis. */
2274 dump_matrix_reorg_analysis (void **slot
, void *data ATTRIBUTE_UNUSED
)
2276 struct matrix_info
*mi
= (struct matrix_info
*) *slot
;
2280 fprintf (dump_file
, "Matrix \"%s\"; Escaping Level: %d, Num Dims: %d,",
2281 get_name (mi
->decl
), mi
->min_indirect_level_escape
, mi
->num_dims
);
2282 fprintf (dump_file
, " Malloc Dims: %d, ", mi
->max_malloced_level
);
2283 fprintf (dump_file
, "\n");
2284 if (mi
->min_indirect_level_escape
>= 2)
2285 fprintf (dump_file
, "Flattened %d dimensions \n",
2286 mi
->min_indirect_level_escape
);
2290 /* Perform matrix flattening. */
2295 struct cgraph_node
*node
;
2298 check_transpose_p
= true;
2300 check_transpose_p
= false;
2301 /* If there are hand written vectors, we skip this optimization. */
2302 for (node
= cgraph_nodes
; node
; node
= node
->next
)
2303 if (!may_flatten_matrices (node
))
2305 matrices_to_reorg
= htab_create (37, mtt_info_hash
, mtt_info_eq
, mat_free
);
2306 /* Find and record all potential matrices in the program. */
2307 find_matrices_decl ();
2308 /* Analyze the accesses of the matrices (escaping analysis). */
2309 for (node
= cgraph_nodes
; node
; node
= node
->next
)
2314 temp_fn
= current_function_decl
;
2315 current_function_decl
= node
->decl
;
2316 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
2317 bitmap_obstack_initialize (NULL
);
2318 gimple_register_cfg_hooks ();
2320 if (!gimple_in_ssa_p (cfun
))
2322 free_dominance_info (CDI_DOMINATORS
);
2323 free_dominance_info (CDI_POST_DOMINATORS
);
2325 current_function_decl
= temp_fn
;
2326 bitmap_obstack_release (NULL
);
2331 #ifdef ENABLE_CHECKING
2332 verify_flow_info ();
2335 if (!matrices_to_reorg
)
2337 free_dominance_info (CDI_DOMINATORS
);
2338 free_dominance_info (CDI_POST_DOMINATORS
);
2340 current_function_decl
= temp_fn
;
2341 bitmap_obstack_release (NULL
);
2346 /* Create htap for phi nodes. */
2347 htab_mat_acc_phi_nodes
= htab_create (37, mat_acc_phi_hash
,
2348 mat_acc_phi_eq
, free
);
2349 if (!check_transpose_p
)
2350 find_sites_in_func (false);
2353 find_sites_in_func (true);
2354 loop_optimizer_init (LOOPS_NORMAL
);
2357 htab_traverse (matrices_to_reorg
, analyze_transpose
, NULL
);
2361 loop_optimizer_finalize ();
2362 current_loops
= NULL
;
2365 /* If the current function is the allocation function for any of
2366 the matrices we check its allocation and the escaping level. */
2367 htab_traverse (matrices_to_reorg
, check_allocation_function
, NULL
);
2368 free_dominance_info (CDI_DOMINATORS
);
2369 free_dominance_info (CDI_POST_DOMINATORS
);
2371 current_function_decl
= temp_fn
;
2372 bitmap_obstack_release (NULL
);
2374 htab_traverse (matrices_to_reorg
, transform_allocation_sites
, NULL
);
2375 /* Now transform the accesses. */
2376 for (node
= cgraph_nodes
; node
; node
= node
->next
)
2379 /* Remember that allocation sites have been handled. */
2382 temp_fn
= current_function_decl
;
2383 current_function_decl
= node
->decl
;
2384 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
2385 bitmap_obstack_initialize (NULL
);
2386 gimple_register_cfg_hooks ();
2387 record_all_accesses_in_func ();
2388 htab_traverse (matrices_to_reorg
, transform_access_sites
, NULL
);
2389 free_dominance_info (CDI_DOMINATORS
);
2390 free_dominance_info (CDI_POST_DOMINATORS
);
2392 current_function_decl
= temp_fn
;
2393 bitmap_obstack_release (NULL
);
2395 htab_traverse (matrices_to_reorg
, dump_matrix_reorg_analysis
, NULL
);
2397 current_function_decl
= NULL
;
2399 matrices_to_reorg
= NULL
;
2404 /* The condition for matrix flattening to be performed. */
2406 gate_matrix_reorg (void)
2408 return flag_ipa_matrix_reorg
&& flag_whole_program
;
2411 struct simple_ipa_opt_pass pass_ipa_matrix_reorg
=
2415 "matrix-reorg", /* name */
2416 gate_matrix_reorg
, /* gate */
2417 matrix_reorg
, /* execute */
2420 0, /* static_pass_number */
2421 TV_NONE
, /* tv_id */
2422 0, /* properties_required */
2423 0, /* properties_provided */
2424 0, /* properties_destroyed */
2425 0, /* todo_flags_start */
2426 TODO_dump_cgraph
| TODO_dump_func
/* todo_flags_finish */