PR fortran/38830
[official-gcc.git] / gcc / matrix-reorg.c
blob9c5369417f24731afccf42d14ded78a703695262
1 /* Matrix layout transformations.
2 Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
3 Contributed by Razya Ladelsky <razya@il.ibm.com>
4 Originally written by Revital Eres and Mustafa Hagog.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
23 Matrix flattening optimization tries to replace a N-dimensional
24 matrix with its equivalent M-dimensional matrix, where M < N.
25 This first implementation focuses on global matrices defined dynamically.
27 When N==1, we actually flatten the whole matrix.
28 For instance consider a two-dimensional array a [dim1] [dim2].
29 The code for allocating space for it usually looks like:
31 a = (int **) malloc(dim1 * sizeof(int *));
32 for (i=0; i<dim1; i++)
33 a[i] = (int *) malloc (dim2 * sizeof(int));
35 If the array "a" is found suitable for this optimization,
36 its allocation is replaced by:
38 a = (int *) malloc (dim1 * dim2 *sizeof(int));
40 and all the references to a[i][j] are replaced by a[i * dim2 + j].
42 The two main phases of the optimization are the analysis
43 and transformation.
44 The driver of the optimization is matrix_reorg ().
48 Analysis phase:
49 ===============
51 We'll number the dimensions outside-in, meaning the most external
52 is 0, then 1, and so on.
53 The analysis part of the optimization determines K, the escape
54 level of a N-dimensional matrix (K <= N), that allows flattening of
55 the external dimensions 0,1,..., K-1. Escape level 0 means that the
56 whole matrix escapes and no flattening is possible.
58 The analysis part is implemented in analyze_matrix_allocation_site()
59 and analyze_matrix_accesses().
61 Transformation phase:
62 =====================
63 In this phase we define the new flattened matrices that replace the
64 original matrices in the code.
65 Implemented in transform_allocation_sites(),
66 transform_access_sites().
68 Matrix Transposing
69 ==================
70 The idea of Matrix Transposing is organizing the matrix in a different
71 layout such that the dimensions are reordered.
72 This could produce better cache behavior in some cases.
74 For example, lets look at the matrix accesses in the following loop:
76 for (i=0; i<N; i++)
77 for (j=0; j<M; j++)
78 access to a[i][j]
80 This loop can produce good cache behavior because the elements of
81 the inner dimension are accessed sequentially.
83 However, if the accesses of the matrix were of the following form:
85 for (i=0; i<N; i++)
86 for (j=0; j<M; j++)
87 access to a[j][i]
89 In this loop we iterate the columns and not the rows.
90 Therefore, replacing the rows and columns
91 would have had an organization with better (cache) locality.
92 Replacing the dimensions of the matrix is called matrix transposing.
94 This example, of course, could be enhanced to multiple dimensions matrices
95 as well.
97 Since a program could include all kind of accesses, there is a decision
98 mechanism, implemented in analyze_transpose(), which implements a
99 heuristic that tries to determine whether to transpose the matrix or not,
100 according to the form of the more dominant accesses.
101 This decision is transferred to the flattening mechanism, and whether
102 the matrix was transposed or not, the matrix is flattened (if possible).
104 This decision making is based on profiling information and loop information.
105 If profiling information is available, decision making mechanism will be
106 operated, otherwise the matrix will only be flattened (if possible).
108 Both optimizations are described in the paper "Matrix flattening and
109 transposing in GCC" which was presented in GCC summit 2006.
110 http://www.gccsummit.org/2006/2006-GCC-Summit-Proceedings.pdf. */
112 #include "config.h"
113 #include "system.h"
114 #include "coretypes.h"
115 #include "tm.h"
116 #include "tree.h"
117 #include "rtl.h"
118 #include "tree-inline.h"
119 #include "tree-flow.h"
120 #include "tree-flow-inline.h"
121 #include "langhooks.h"
122 #include "hashtab.h"
123 #include "toplev.h"
124 #include "flags.h"
125 #include "ggc.h"
126 #include "debug.h"
127 #include "target.h"
128 #include "cgraph.h"
129 #include "diagnostic.h"
130 #include "timevar.h"
131 #include "params.h"
132 #include "fibheap.h"
133 #include "intl.h"
134 #include "function.h"
135 #include "basic-block.h"
136 #include "cfgloop.h"
137 #include "tree-iterator.h"
138 #include "tree-pass.h"
139 #include "opts.h"
140 #include "tree-data-ref.h"
141 #include "tree-chrec.h"
142 #include "tree-scalar-evolution.h"
144 /* We need to collect a lot of data from the original malloc,
145 particularly as the gimplifier has converted:
147 orig_var = (struct_type *) malloc (x * sizeof (struct_type *));
149 into
151 T3 = <constant> ; ** <constant> is amount to malloc; precomputed **
152 T4 = malloc (T3);
153 T5 = (struct_type *) T4;
154 orig_var = T5;
156 The following struct fields allow us to collect all the necessary data from
157 the gimplified program. The comments in the struct below are all based
158 on the gimple example above. */
160 struct malloc_call_data
162 gimple call_stmt; /* Tree for "T4 = malloc (T3);" */
163 tree size_var; /* Var decl for T3. */
164 tree malloc_size; /* Tree for "<constant>", the rhs assigned to T3. */
167 static tree can_calculate_expr_before_stmt (tree, sbitmap);
168 static tree can_calculate_stmt_before_stmt (gimple, sbitmap);
170 /* The front end of the compiler, when parsing statements of the form:
172 var = (type_cast) malloc (sizeof (type));
174 always converts this single statement into the following statements
175 (GIMPLE form):
177 T.1 = sizeof (type);
178 T.2 = malloc (T.1);
179 T.3 = (type_cast) T.2;
180 var = T.3;
182 Since we need to create new malloc statements and modify the original
183 statements somewhat, we need to find all four of the above statements.
184 Currently record_call_1 (called for building cgraph edges) finds and
185 records the statements containing the actual call to malloc, but we
186 need to find the rest of the variables/statements on our own. That
187 is what the following function does. */
188 static void
189 collect_data_for_malloc_call (gimple stmt, struct malloc_call_data *m_data)
191 tree size_var = NULL;
192 tree malloc_fn_decl;
193 tree arg1;
195 gcc_assert (is_gimple_call (stmt));
197 malloc_fn_decl = gimple_call_fndecl (stmt);
198 if (malloc_fn_decl == NULL
199 || DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
200 return;
202 arg1 = gimple_call_arg (stmt, 0);
203 size_var = arg1;
205 m_data->call_stmt = stmt;
206 m_data->size_var = size_var;
207 if (TREE_CODE (size_var) != VAR_DECL)
208 m_data->malloc_size = size_var;
209 else
210 m_data->malloc_size = NULL_TREE;
213 /* Information about matrix access site.
214 For example: if an access site of matrix arr is arr[i][j]
215 the ACCESS_SITE_INFO structure will have the address
216 of arr as its stmt. The INDEX_INFO will hold information about the
217 initial address and index of each dimension. */
218 struct access_site_info
220 /* The statement (INDIRECT_REF or POINTER_PLUS_EXPR). */
221 gimple stmt;
223 /* In case of POINTER_PLUS_EXPR, what is the offset. */
224 tree offset;
226 /* The index which created the offset. */
227 tree index;
229 /* The indirection level of this statement. */
230 int level;
232 /* TRUE for allocation site FALSE for access site. */
233 bool is_alloc;
235 /* The function containing the access site. */
236 tree function_decl;
238 /* This access is iterated in the inner most loop */
239 bool iterated_by_inner_most_loop_p;
242 typedef struct access_site_info *access_site_info_p;
243 DEF_VEC_P (access_site_info_p);
244 DEF_VEC_ALLOC_P (access_site_info_p, heap);
246 /* Information about matrix to flatten. */
247 struct matrix_info
249 /* Decl tree of this matrix. */
250 tree decl;
251 /* Number of dimensions; number
252 of "*" in the type declaration. */
253 int num_dims;
255 /* Minimum indirection level that escapes, 0 means that
256 the whole matrix escapes, k means that dimensions
257 0 to ACTUAL_DIM - k escapes. */
258 int min_indirect_level_escape;
260 gimple min_indirect_level_escape_stmt;
262 /* Hold the allocation site for each level (dimension).
263 We can use NUM_DIMS as the upper bound and allocate the array
264 once with this number of elements and no need to use realloc and
265 MAX_MALLOCED_LEVEL. */
266 gimple *malloc_for_level;
268 int max_malloced_level;
270 /* Is the matrix transposed. */
271 bool is_transposed_p;
273 /* The location of the allocation sites (they must be in one
274 function). */
275 tree allocation_function_decl;
277 /* The calls to free for each level of indirection. */
278 struct free_info
280 gimple stmt;
281 tree func;
282 } *free_stmts;
284 /* An array which holds for each dimension its size. where
285 dimension 0 is the outer most (one that contains all the others).
287 tree *dimension_size;
289 /* An array which holds for each dimension it's original size
290 (before transposing and flattening take place). */
291 tree *dimension_size_orig;
293 /* An array which holds for each dimension the size of the type of
294 of elements accessed in that level (in bytes). */
295 HOST_WIDE_INT *dimension_type_size;
297 int dimension_type_size_len;
299 /* An array collecting the count of accesses for each dimension. */
300 gcov_type *dim_hot_level;
302 /* An array of the accesses to be flattened.
303 elements are of type "struct access_site_info *". */
304 VEC (access_site_info_p, heap) * access_l;
306 /* A map of how the dimensions will be organized at the end of
307 the analyses. */
308 int *dim_map;
311 /* In each phi node we want to record the indirection level we have when we
312 get to the phi node. Usually we will have phi nodes with more than two
313 arguments, then we must assure that all of them get to the phi node with
314 the same indirection level, otherwise it's not safe to do the flattening.
315 So we record the information regarding the indirection level each time we
316 get to the phi node in this hash table. */
318 struct matrix_access_phi_node
320 gimple phi;
321 int indirection_level;
324 /* We use this structure to find if the SSA variable is accessed inside the
325 tree and record the tree containing it. */
327 struct ssa_acc_in_tree
329 /* The variable whose accesses in the tree we are looking for. */
330 tree ssa_var;
331 /* The tree and code inside it the ssa_var is accessed, currently
332 it could be an INDIRECT_REF or CALL_EXPR. */
333 enum tree_code t_code;
334 tree t_tree;
335 /* The place in the containing tree. */
336 tree *tp;
337 tree second_op;
338 bool var_found;
341 static void analyze_matrix_accesses (struct matrix_info *, tree, int, bool,
342 sbitmap, bool);
343 static int transform_allocation_sites (void **, void *);
344 static int transform_access_sites (void **, void *);
345 static int analyze_transpose (void **, void *);
346 static int dump_matrix_reorg_analysis (void **, void *);
348 static bool check_transpose_p;
350 /* Hash function used for the phi nodes. */
352 static hashval_t
353 mat_acc_phi_hash (const void *p)
355 const struct matrix_access_phi_node *const ma_phi =
356 (const struct matrix_access_phi_node *) p;
358 return htab_hash_pointer (ma_phi->phi);
361 /* Equality means phi node pointers are the same. */
363 static int
364 mat_acc_phi_eq (const void *p1, const void *p2)
366 const struct matrix_access_phi_node *const phi1 =
367 (const struct matrix_access_phi_node *) p1;
368 const struct matrix_access_phi_node *const phi2 =
369 (const struct matrix_access_phi_node *) p2;
371 if (phi1->phi == phi2->phi)
372 return 1;
374 return 0;
377 /* Hold the PHI nodes we visit during the traversal for escaping
378 analysis. */
379 static htab_t htab_mat_acc_phi_nodes = NULL;
381 /* This hash-table holds the information about the matrices we are
382 going to handle. */
383 static htab_t matrices_to_reorg = NULL;
385 /* Return a hash for MTT, which is really a "matrix_info *". */
386 static hashval_t
387 mtt_info_hash (const void *mtt)
389 return htab_hash_pointer (((const struct matrix_info *) mtt)->decl);
392 /* Return true if MTT1 and MTT2 (which are really both of type
393 "matrix_info *") refer to the same decl. */
394 static int
395 mtt_info_eq (const void *mtt1, const void *mtt2)
397 const struct matrix_info *const i1 = (const struct matrix_info *) mtt1;
398 const struct matrix_info *const i2 = (const struct matrix_info *) mtt2;
400 if (i1->decl == i2->decl)
401 return true;
403 return false;
406 /* Return false if STMT may contain a vector expression.
407 In this situation, all matrices should not be flattened. */
408 static bool
409 may_flatten_matrices_1 (gimple stmt)
411 tree t;
413 switch (gimple_code (stmt))
415 case GIMPLE_ASSIGN:
416 if (!gimple_assign_cast_p (stmt))
417 return true;
419 t = gimple_assign_rhs1 (stmt);
420 while (CONVERT_EXPR_P (t))
422 if (TREE_TYPE (t) && POINTER_TYPE_P (TREE_TYPE (t)))
424 tree pointee;
426 pointee = TREE_TYPE (t);
427 while (POINTER_TYPE_P (pointee))
428 pointee = TREE_TYPE (pointee);
429 if (TREE_CODE (pointee) == VECTOR_TYPE)
431 if (dump_file)
432 fprintf (dump_file,
433 "Found vector type, don't flatten matrix\n");
434 return false;
437 t = TREE_OPERAND (t, 0);
439 break;
440 case GIMPLE_ASM:
441 /* Asm code could contain vector operations. */
442 return false;
443 break;
444 default:
445 break;
447 return true;
450 /* Return false if there are hand-written vectors in the program.
451 We disable the flattening in such a case. */
452 static bool
453 may_flatten_matrices (struct cgraph_node *node)
455 tree decl;
456 struct function *func;
457 basic_block bb;
458 gimple_stmt_iterator gsi;
460 decl = node->decl;
461 if (node->analyzed)
463 func = DECL_STRUCT_FUNCTION (decl);
464 FOR_EACH_BB_FN (bb, func)
465 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
466 if (!may_flatten_matrices_1 (gsi_stmt (gsi)))
467 return false;
469 return true;
472 /* Given a VAR_DECL, check its type to determine whether it is
473 a definition of a dynamic allocated matrix and therefore is
474 a suitable candidate for the matrix flattening optimization.
475 Return NULL if VAR_DECL is not such decl. Otherwise, allocate
476 a MATRIX_INFO structure, fill it with the relevant information
477 and return a pointer to it.
478 TODO: handle also statically defined arrays. */
479 static struct matrix_info *
480 analyze_matrix_decl (tree var_decl)
482 struct matrix_info *m_node, tmpmi, *mi;
483 tree var_type;
484 int dim_num = 0;
486 gcc_assert (matrices_to_reorg);
488 if (TREE_CODE (var_decl) == PARM_DECL)
489 var_type = DECL_ARG_TYPE (var_decl);
490 else if (TREE_CODE (var_decl) == VAR_DECL)
491 var_type = TREE_TYPE (var_decl);
492 else
493 return NULL;
495 if (!POINTER_TYPE_P (var_type))
496 return NULL;
498 while (POINTER_TYPE_P (var_type))
500 var_type = TREE_TYPE (var_type);
501 dim_num++;
504 if (dim_num <= 1)
505 return NULL;
507 if (!COMPLETE_TYPE_P (var_type)
508 || TREE_CODE (TYPE_SIZE_UNIT (var_type)) != INTEGER_CST)
509 return NULL;
511 /* Check to see if this pointer is already in there. */
512 tmpmi.decl = var_decl;
513 mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi);
515 if (mi)
516 return NULL;
518 /* Record the matrix. */
520 m_node = (struct matrix_info *) xcalloc (1, sizeof (struct matrix_info));
521 m_node->decl = var_decl;
522 m_node->num_dims = dim_num;
523 m_node->free_stmts
524 = (struct free_info *) xcalloc (dim_num, sizeof (struct free_info));
526 /* Init min_indirect_level_escape to -1 to indicate that no escape
527 analysis has been done yet. */
528 m_node->min_indirect_level_escape = -1;
529 m_node->is_transposed_p = false;
531 return m_node;
534 /* Free matrix E. */
535 static void
536 mat_free (void *e)
538 struct matrix_info *mat = (struct matrix_info *) e;
540 if (!mat)
541 return;
543 if (mat->free_stmts)
544 free (mat->free_stmts);
545 if (mat->dim_hot_level)
546 free (mat->dim_hot_level);
547 if (mat->malloc_for_level)
548 free (mat->malloc_for_level);
551 /* Find all potential matrices.
552 TODO: currently we handle only multidimensional
553 dynamically allocated arrays. */
554 static void
555 find_matrices_decl (void)
557 struct matrix_info *tmp;
558 PTR *slot;
559 struct varpool_node *vnode;
561 gcc_assert (matrices_to_reorg);
563 /* For every global variable in the program:
564 Check to see if it's of a candidate type and record it. */
565 for (vnode = varpool_nodes_queue; vnode; vnode = vnode->next_needed)
567 tree var_decl = vnode->decl;
569 if (!var_decl || TREE_CODE (var_decl) != VAR_DECL)
570 continue;
572 if (matrices_to_reorg)
573 if ((tmp = analyze_matrix_decl (var_decl)))
575 if (!TREE_ADDRESSABLE (var_decl))
577 slot = htab_find_slot (matrices_to_reorg, tmp, INSERT);
578 *slot = tmp;
582 return;
585 /* Mark that the matrix MI escapes at level L. */
586 static void
587 mark_min_matrix_escape_level (struct matrix_info *mi, int l, gimple s)
589 if (mi->min_indirect_level_escape == -1
590 || (mi->min_indirect_level_escape > l))
592 mi->min_indirect_level_escape = l;
593 mi->min_indirect_level_escape_stmt = s;
597 /* Find if the SSA variable is accessed inside the
598 tree and record the tree containing it.
599 The only relevant uses are the case of SSA_NAME, or SSA inside
600 INDIRECT_REF, PLUS_EXPR, POINTER_PLUS_EXPR, MULT_EXPR. */
601 static void
602 ssa_accessed_in_tree (tree t, struct ssa_acc_in_tree *a)
604 a->t_code = TREE_CODE (t);
605 switch (a->t_code)
607 case SSA_NAME:
608 if (t == a->ssa_var)
609 a->var_found = true;
610 break;
611 case INDIRECT_REF:
612 if (SSA_VAR_P (TREE_OPERAND (t, 0))
613 && TREE_OPERAND (t, 0) == a->ssa_var)
614 a->var_found = true;
615 break;
616 default:
617 break;
621 /* Find if the SSA variable is accessed on the right hand side of
622 gimple call STMT. */
624 static void
625 ssa_accessed_in_call_rhs (gimple stmt, struct ssa_acc_in_tree *a)
627 tree decl;
628 tree arg;
629 size_t i;
631 a->t_code = CALL_EXPR;
632 for (i = 0; i < gimple_call_num_args (stmt); i++)
634 arg = gimple_call_arg (stmt, i);
635 if (arg == a->ssa_var)
637 a->var_found = true;
638 decl = gimple_call_fndecl (stmt);
639 a->t_tree = decl;
640 break;
645 /* Find if the SSA variable is accessed on the right hand side of
646 gimple assign STMT. */
648 static void
649 ssa_accessed_in_assign_rhs (gimple stmt, struct ssa_acc_in_tree *a)
652 a->t_code = gimple_assign_rhs_code (stmt);
653 switch (a->t_code)
655 tree op1, op2;
657 case SSA_NAME:
658 case INDIRECT_REF:
659 CASE_CONVERT:
660 case VIEW_CONVERT_EXPR:
661 ssa_accessed_in_tree (gimple_assign_rhs1 (stmt), a);
662 break;
663 case POINTER_PLUS_EXPR:
664 case PLUS_EXPR:
665 case MULT_EXPR:
666 op1 = gimple_assign_rhs1 (stmt);
667 op2 = gimple_assign_rhs2 (stmt);
669 if (op1 == a->ssa_var)
671 a->var_found = true;
672 a->second_op = op2;
674 else if (op2 == a->ssa_var)
676 a->var_found = true;
677 a->second_op = op1;
679 break;
680 default:
681 break;
685 /* Record the access/allocation site information for matrix MI so we can
686 handle it later in transformation. */
687 static void
688 record_access_alloc_site_info (struct matrix_info *mi, gimple stmt, tree offset,
689 tree index, int level, bool is_alloc)
691 struct access_site_info *acc_info;
693 if (!mi->access_l)
694 mi->access_l = VEC_alloc (access_site_info_p, heap, 100);
696 acc_info
697 = (struct access_site_info *)
698 xcalloc (1, sizeof (struct access_site_info));
699 acc_info->stmt = stmt;
700 acc_info->offset = offset;
701 acc_info->index = index;
702 acc_info->function_decl = current_function_decl;
703 acc_info->level = level;
704 acc_info->is_alloc = is_alloc;
706 VEC_safe_push (access_site_info_p, heap, mi->access_l, acc_info);
710 /* Record the malloc as the allocation site of the given LEVEL. But
711 first we Make sure that all the size parameters passed to malloc in
712 all the allocation sites could be pre-calculated before the call to
713 the malloc of level 0 (the main malloc call). */
714 static void
715 add_allocation_site (struct matrix_info *mi, gimple stmt, int level)
717 struct malloc_call_data mcd;
719 /* Make sure that the allocation sites are in the same function. */
720 if (!mi->allocation_function_decl)
721 mi->allocation_function_decl = current_function_decl;
722 else if (mi->allocation_function_decl != current_function_decl)
724 int min_malloc_level;
726 gcc_assert (mi->malloc_for_level);
728 /* Find the minimum malloc level that already has been seen;
729 we known its allocation function must be
730 MI->allocation_function_decl since it's different than
731 CURRENT_FUNCTION_DECL then the escaping level should be
732 MIN (LEVEL, MIN_MALLOC_LEVEL) - 1 , and the allocation function
733 must be set accordingly. */
734 for (min_malloc_level = 0;
735 min_malloc_level < mi->max_malloced_level
736 && mi->malloc_for_level[min_malloc_level]; min_malloc_level++);
737 if (level < min_malloc_level)
739 mi->allocation_function_decl = current_function_decl;
740 mark_min_matrix_escape_level (mi, min_malloc_level, stmt);
742 else
744 mark_min_matrix_escape_level (mi, level, stmt);
745 /* cannot be that (level == min_malloc_level)
746 we would have returned earlier. */
747 return;
751 /* Find the correct malloc information. */
752 collect_data_for_malloc_call (stmt, &mcd);
754 /* We accept only calls to malloc function; we do not accept
755 calls like calloc and realloc. */
756 if (!mi->malloc_for_level)
758 mi->malloc_for_level = XCNEWVEC (gimple, level + 1);
759 mi->max_malloced_level = level + 1;
761 else if (mi->max_malloced_level <= level)
763 mi->malloc_for_level
764 = XRESIZEVEC (gimple, mi->malloc_for_level, level + 1);
766 /* Zero the newly allocated items. */
767 memset (&(mi->malloc_for_level[mi->max_malloced_level + 1]),
768 0, (level - mi->max_malloced_level) * sizeof (tree));
770 mi->max_malloced_level = level + 1;
772 mi->malloc_for_level[level] = stmt;
775 /* Given an assignment statement STMT that we know that its
776 left-hand-side is the matrix MI variable, we traverse the immediate
777 uses backwards until we get to a malloc site. We make sure that
778 there is one and only one malloc site that sets this variable. When
779 we are performing the flattening we generate a new variable that
780 will hold the size for each dimension; each malloc that allocates a
781 dimension has the size parameter; we use that parameter to
782 initialize the dimension size variable so we can use it later in
783 the address calculations. LEVEL is the dimension we're inspecting.
784 Return if STMT is related to an allocation site. */
786 static void
787 analyze_matrix_allocation_site (struct matrix_info *mi, gimple stmt,
788 int level, sbitmap visited)
790 if (gimple_assign_copy_p (stmt) || gimple_assign_cast_p (stmt))
792 tree rhs = gimple_assign_rhs1 (stmt);
794 if (TREE_CODE (rhs) == SSA_NAME)
796 gimple def = SSA_NAME_DEF_STMT (rhs);
798 analyze_matrix_allocation_site (mi, def, level, visited);
799 return;
801 /* If we are back to the original matrix variable then we
802 are sure that this is analyzed as an access site. */
803 else if (rhs == mi->decl)
804 return;
806 /* A result of call to malloc. */
807 else if (is_gimple_call (stmt))
809 int call_flags = gimple_call_flags (stmt);
811 if (!(call_flags & ECF_MALLOC))
813 mark_min_matrix_escape_level (mi, level, stmt);
814 return;
816 else
818 tree malloc_fn_decl;
819 const char *malloc_fname;
821 malloc_fn_decl = gimple_call_fndecl (stmt);
822 if (malloc_fn_decl == NULL_TREE)
824 mark_min_matrix_escape_level (mi, level, stmt);
825 return;
827 malloc_fname = IDENTIFIER_POINTER (DECL_NAME (malloc_fn_decl));
828 if (DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
830 if (dump_file)
831 fprintf (dump_file,
832 "Matrix %s is an argument to function %s\n",
833 get_name (mi->decl), get_name (malloc_fn_decl));
834 mark_min_matrix_escape_level (mi, level, stmt);
835 return;
838 /* This is a call to malloc of level 'level'.
839 mi->max_malloced_level-1 == level means that we've
840 seen a malloc statement of level 'level' before.
841 If the statement is not the same one that we've
842 seen before, then there's another malloc statement
843 for the same level, which means that we need to mark
844 it escaping. */
845 if (mi->malloc_for_level
846 && mi->max_malloced_level-1 == level
847 && mi->malloc_for_level[level] != stmt)
849 mark_min_matrix_escape_level (mi, level, stmt);
850 return;
852 else
853 add_allocation_site (mi, stmt, level);
854 return;
856 /* Looks like we don't know what is happening in this
857 statement so be in the safe side and mark it as escaping. */
858 mark_min_matrix_escape_level (mi, level, stmt);
861 /* The transposing decision making.
862 In order to to calculate the profitability of transposing, we collect two
863 types of information regarding the accesses:
864 1. profiling information used to express the hotness of an access, that
865 is how often the matrix is accessed by this access site (count of the
866 access site).
867 2. which dimension in the access site is iterated by the inner
868 most loop containing this access.
870 The matrix will have a calculated value of weighted hotness for each
871 dimension.
872 Intuitively the hotness level of a dimension is a function of how
873 many times it was the most frequently accessed dimension in the
874 highly executed access sites of this matrix.
876 As computed by following equation:
877 m n
878 __ __
879 \ \ dim_hot_level[i] +=
880 /_ /_
881 j i
882 acc[j]->dim[i]->iter_by_inner_loop * count(j)
884 Where n is the number of dims and m is the number of the matrix
885 access sites. acc[j]->dim[i]->iter_by_inner_loop is 1 if acc[j]
886 iterates over dim[i] in innermost loop, and is 0 otherwise.
888 The organization of the new matrix should be according to the
889 hotness of each dimension. The hotness of the dimension implies
890 the locality of the elements.*/
891 static int
892 analyze_transpose (void **slot, void *data ATTRIBUTE_UNUSED)
894 struct matrix_info *mi = (struct matrix_info *) *slot;
895 int min_escape_l = mi->min_indirect_level_escape;
896 struct loop *loop;
897 affine_iv iv;
898 struct access_site_info *acc_info;
899 int i;
901 if (min_escape_l < 2 || !mi->access_l)
903 if (mi->access_l)
905 for (i = 0;
906 VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
907 i++)
908 free (acc_info);
909 VEC_free (access_site_info_p, heap, mi->access_l);
912 return 1;
914 if (!mi->dim_hot_level)
915 mi->dim_hot_level =
916 (gcov_type *) xcalloc (min_escape_l, sizeof (gcov_type));
919 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
920 i++)
922 if (gimple_assign_rhs_code (acc_info->stmt) == POINTER_PLUS_EXPR
923 && acc_info->level < min_escape_l)
925 loop = loop_containing_stmt (acc_info->stmt);
926 if (!loop || loop->inner)
928 free (acc_info);
929 continue;
931 if (simple_iv (loop, loop, acc_info->offset, &iv, true))
933 if (iv.step != NULL)
935 HOST_WIDE_INT istep;
937 istep = int_cst_value (iv.step);
938 if (istep != 0)
940 acc_info->iterated_by_inner_most_loop_p = 1;
941 mi->dim_hot_level[acc_info->level] +=
942 gimple_bb (acc_info->stmt)->count;
948 free (acc_info);
950 VEC_free (access_site_info_p, heap, mi->access_l);
952 return 1;
955 /* Find the index which defines the OFFSET from base.
956 We walk from use to def until we find how the offset was defined. */
957 static tree
958 get_index_from_offset (tree offset, gimple def_stmt)
960 tree op1, op2, index;
962 if (gimple_code (def_stmt) == GIMPLE_PHI)
963 return NULL;
964 if ((gimple_assign_copy_p (def_stmt) || gimple_assign_cast_p (def_stmt))
965 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
966 return get_index_from_offset (offset,
967 SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt)));
968 else if (is_gimple_assign (def_stmt)
969 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
971 op1 = gimple_assign_rhs1 (def_stmt);
972 op2 = gimple_assign_rhs2 (def_stmt);
973 if (TREE_CODE (op1) != INTEGER_CST && TREE_CODE (op2) != INTEGER_CST)
974 return NULL;
975 index = (TREE_CODE (op1) == INTEGER_CST) ? op2 : op1;
976 return index;
978 else
979 return NULL_TREE;
982 /* update MI->dimension_type_size[CURRENT_INDIRECT_LEVEL] with the size
983 of the type related to the SSA_VAR, or the type related to the
984 lhs of STMT, in the case that it is an INDIRECT_REF. */
985 static void
986 update_type_size (struct matrix_info *mi, gimple stmt, tree ssa_var,
987 int current_indirect_level)
989 tree lhs;
990 HOST_WIDE_INT type_size;
992 /* Update type according to the type of the INDIRECT_REF expr. */
993 if (is_gimple_assign (stmt)
994 && TREE_CODE (gimple_assign_lhs (stmt)) == INDIRECT_REF)
996 lhs = gimple_assign_lhs (stmt);
997 gcc_assert (POINTER_TYPE_P
998 (TREE_TYPE (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
999 type_size =
1000 int_size_in_bytes (TREE_TYPE
1001 (TREE_TYPE
1002 (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
1004 else
1005 type_size = int_size_in_bytes (TREE_TYPE (ssa_var));
1007 /* Record the size of elements accessed (as a whole)
1008 in the current indirection level (dimension). If the size of
1009 elements is not known at compile time, mark it as escaping. */
1010 if (type_size <= 0)
1011 mark_min_matrix_escape_level (mi, current_indirect_level, stmt);
1012 else
1014 int l = current_indirect_level;
1016 if (!mi->dimension_type_size)
1018 mi->dimension_type_size
1019 = (HOST_WIDE_INT *) xcalloc (l + 1, sizeof (HOST_WIDE_INT));
1020 mi->dimension_type_size_len = l + 1;
1022 else if (mi->dimension_type_size_len < l + 1)
1024 mi->dimension_type_size
1025 = (HOST_WIDE_INT *) xrealloc (mi->dimension_type_size,
1026 (l + 1) * sizeof (HOST_WIDE_INT));
1027 memset (&mi->dimension_type_size[mi->dimension_type_size_len],
1028 0, (l + 1 - mi->dimension_type_size_len)
1029 * sizeof (HOST_WIDE_INT));
1030 mi->dimension_type_size_len = l + 1;
1032 /* Make sure all the accesses in the same level have the same size
1033 of the type. */
1034 if (!mi->dimension_type_size[l])
1035 mi->dimension_type_size[l] = type_size;
1036 else if (mi->dimension_type_size[l] != type_size)
1037 mark_min_matrix_escape_level (mi, l, stmt);
1041 /* USE_STMT represents a GIMPLE_CALL, where one of the arguments is the
1042 ssa var that we want to check because it came from some use of matrix
1043 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so
1044 far. */
1046 static int
1047 analyze_accesses_for_call_stmt (struct matrix_info *mi, tree ssa_var,
1048 gimple use_stmt, int current_indirect_level)
1050 tree fndecl = gimple_call_fndecl (use_stmt);
1052 if (gimple_call_lhs (use_stmt))
1054 tree lhs = gimple_call_lhs (use_stmt);
1055 struct ssa_acc_in_tree lhs_acc, rhs_acc;
1057 memset (&lhs_acc, 0, sizeof (lhs_acc));
1058 memset (&rhs_acc, 0, sizeof (rhs_acc));
1060 lhs_acc.ssa_var = ssa_var;
1061 lhs_acc.t_code = ERROR_MARK;
1062 ssa_accessed_in_tree (lhs, &lhs_acc);
1063 rhs_acc.ssa_var = ssa_var;
1064 rhs_acc.t_code = ERROR_MARK;
1065 ssa_accessed_in_call_rhs (use_stmt, &rhs_acc);
1067 /* The SSA must be either in the left side or in the right side,
1068 to understand what is happening.
1069 In case the SSA_NAME is found in both sides we should be escaping
1070 at this level because in this case we cannot calculate the
1071 address correctly. */
1072 if ((lhs_acc.var_found && rhs_acc.var_found
1073 && lhs_acc.t_code == INDIRECT_REF)
1074 || (!rhs_acc.var_found && !lhs_acc.var_found))
1076 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1077 return current_indirect_level;
1079 gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1081 /* If we are storing to the matrix at some level, then mark it as
1082 escaping at that level. */
1083 if (lhs_acc.var_found)
1085 int l = current_indirect_level + 1;
1087 gcc_assert (lhs_acc.t_code == INDIRECT_REF);
1088 mark_min_matrix_escape_level (mi, l, use_stmt);
1089 return current_indirect_level;
1093 if (fndecl)
1095 if (DECL_FUNCTION_CODE (fndecl) != BUILT_IN_FREE)
1097 if (dump_file)
1098 fprintf (dump_file,
1099 "Matrix %s: Function call %s, level %d escapes.\n",
1100 get_name (mi->decl), get_name (fndecl),
1101 current_indirect_level);
1102 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1104 else if (mi->free_stmts[current_indirect_level].stmt != NULL
1105 && mi->free_stmts[current_indirect_level].stmt != use_stmt)
1106 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1107 else
1109 /*Record the free statements so we can delete them
1110 later. */
1111 int l = current_indirect_level;
1113 mi->free_stmts[l].stmt = use_stmt;
1114 mi->free_stmts[l].func = current_function_decl;
1117 return current_indirect_level;
1120 /* USE_STMT represents a phi node of the ssa var that we want to
1121 check because it came from some use of matrix
1123 We check all the escaping levels that get to the PHI node
1124 and make sure they are all the same escaping;
1125 if not (which is rare) we let the escaping level be the
1126 minimum level that gets into that PHI because starting from
1127 that level we cannot expect the behavior of the indirections.
1128 CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1130 static void
1131 analyze_accesses_for_phi_node (struct matrix_info *mi, gimple use_stmt,
1132 int current_indirect_level, sbitmap visited,
1133 bool record_accesses)
1136 struct matrix_access_phi_node tmp_maphi, *maphi, **pmaphi;
1138 tmp_maphi.phi = use_stmt;
1139 if ((maphi = (struct matrix_access_phi_node *)
1140 htab_find (htab_mat_acc_phi_nodes, &tmp_maphi)))
1142 if (maphi->indirection_level == current_indirect_level)
1143 return;
1144 else
1146 int level = MIN (maphi->indirection_level,
1147 current_indirect_level);
1148 size_t j;
1149 gimple stmt = NULL;
1151 maphi->indirection_level = level;
1152 for (j = 0; j < gimple_phi_num_args (use_stmt); j++)
1154 tree def = PHI_ARG_DEF (use_stmt, j);
1156 if (gimple_code (SSA_NAME_DEF_STMT (def)) != GIMPLE_PHI)
1157 stmt = SSA_NAME_DEF_STMT (def);
1159 mark_min_matrix_escape_level (mi, level, stmt);
1161 return;
1163 maphi = (struct matrix_access_phi_node *)
1164 xcalloc (1, sizeof (struct matrix_access_phi_node));
1165 maphi->phi = use_stmt;
1166 maphi->indirection_level = current_indirect_level;
1168 /* Insert to hash table. */
1169 pmaphi = (struct matrix_access_phi_node **)
1170 htab_find_slot (htab_mat_acc_phi_nodes, maphi, INSERT);
1171 gcc_assert (pmaphi);
1172 *pmaphi = maphi;
1174 if (!TEST_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt))))
1176 SET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1177 analyze_matrix_accesses (mi, PHI_RESULT (use_stmt),
1178 current_indirect_level, false, visited,
1179 record_accesses);
1180 RESET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1184 /* USE_STMT represents an assign statement (the rhs or lhs include
1185 the ssa var that we want to check because it came from some use of matrix
1186 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1188 static int
1189 analyze_accesses_for_assign_stmt (struct matrix_info *mi, tree ssa_var,
1190 gimple use_stmt, int current_indirect_level,
1191 bool last_op, sbitmap visited,
1192 bool record_accesses)
1194 tree lhs = gimple_get_lhs (use_stmt);
1195 struct ssa_acc_in_tree lhs_acc, rhs_acc;
1197 memset (&lhs_acc, 0, sizeof (lhs_acc));
1198 memset (&rhs_acc, 0, sizeof (rhs_acc));
1200 lhs_acc.ssa_var = ssa_var;
1201 lhs_acc.t_code = ERROR_MARK;
1202 ssa_accessed_in_tree (lhs, &lhs_acc);
1203 rhs_acc.ssa_var = ssa_var;
1204 rhs_acc.t_code = ERROR_MARK;
1205 ssa_accessed_in_assign_rhs (use_stmt, &rhs_acc);
1207 /* The SSA must be either in the left side or in the right side,
1208 to understand what is happening.
1209 In case the SSA_NAME is found in both sides we should be escaping
1210 at this level because in this case we cannot calculate the
1211 address correctly. */
1212 if ((lhs_acc.var_found && rhs_acc.var_found
1213 && lhs_acc.t_code == INDIRECT_REF)
1214 || (!rhs_acc.var_found && !lhs_acc.var_found))
1216 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1217 return current_indirect_level;
1219 gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1221 /* If we are storing to the matrix at some level, then mark it as
1222 escaping at that level. */
1223 if (lhs_acc.var_found)
1225 int l = current_indirect_level + 1;
1227 gcc_assert (lhs_acc.t_code == INDIRECT_REF);
1229 if (!(gimple_assign_copy_p (use_stmt)
1230 || gimple_assign_cast_p (use_stmt))
1231 || (TREE_CODE (gimple_assign_rhs1 (use_stmt)) != SSA_NAME))
1232 mark_min_matrix_escape_level (mi, l, use_stmt);
1233 else
1235 gimple def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (use_stmt));
1236 analyze_matrix_allocation_site (mi, def_stmt, l, visited);
1237 if (record_accesses)
1238 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1239 NULL_TREE, l, true);
1240 update_type_size (mi, use_stmt, NULL, l);
1242 return current_indirect_level;
1244 /* Now, check the right-hand-side, to see how the SSA variable
1245 is used. */
1246 if (rhs_acc.var_found)
1248 if (rhs_acc.t_code != INDIRECT_REF
1249 && rhs_acc.t_code != POINTER_PLUS_EXPR && rhs_acc.t_code != SSA_NAME)
1251 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1252 return current_indirect_level;
1254 /* If the access in the RHS has an indirection increase the
1255 indirection level. */
1256 if (rhs_acc.t_code == INDIRECT_REF)
1258 if (record_accesses)
1259 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1260 NULL_TREE,
1261 current_indirect_level, true);
1262 current_indirect_level += 1;
1264 else if (rhs_acc.t_code == POINTER_PLUS_EXPR)
1266 gcc_assert (rhs_acc.second_op);
1267 if (last_op)
1268 /* Currently we support only one PLUS expression on the
1269 SSA_NAME that holds the base address of the current
1270 indirection level; to support more general case there
1271 is a need to hold a stack of expressions and regenerate
1272 the calculation later. */
1273 mark_min_matrix_escape_level (mi, current_indirect_level,
1274 use_stmt);
1275 else
1277 tree index;
1278 tree op1, op2;
1280 op1 = gimple_assign_rhs1 (use_stmt);
1281 op2 = gimple_assign_rhs2 (use_stmt);
1283 op2 = (op1 == ssa_var) ? op2 : op1;
1284 if (TREE_CODE (op2) == INTEGER_CST)
1285 index =
1286 build_int_cst (TREE_TYPE (op1),
1287 TREE_INT_CST_LOW (op2) /
1288 int_size_in_bytes (TREE_TYPE (op1)));
1289 else
1291 index =
1292 get_index_from_offset (op2, SSA_NAME_DEF_STMT (op2));
1293 if (index == NULL_TREE)
1295 mark_min_matrix_escape_level (mi,
1296 current_indirect_level,
1297 use_stmt);
1298 return current_indirect_level;
1301 if (record_accesses)
1302 record_access_alloc_site_info (mi, use_stmt, op2,
1303 index,
1304 current_indirect_level, false);
1307 /* If we are storing this level of indirection mark it as
1308 escaping. */
1309 if (lhs_acc.t_code == INDIRECT_REF || TREE_CODE (lhs) != SSA_NAME)
1311 int l = current_indirect_level;
1313 /* One exception is when we are storing to the matrix
1314 variable itself; this is the case of malloc, we must make
1315 sure that it's the one and only one call to malloc so
1316 we call analyze_matrix_allocation_site to check
1317 this out. */
1318 if (TREE_CODE (lhs) != VAR_DECL || lhs != mi->decl)
1319 mark_min_matrix_escape_level (mi, current_indirect_level,
1320 use_stmt);
1321 else
1323 /* Also update the escaping level. */
1324 analyze_matrix_allocation_site (mi, use_stmt, l, visited);
1325 if (record_accesses)
1326 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1327 NULL_TREE, l, true);
1330 else
1332 /* We are placing it in an SSA, follow that SSA. */
1333 analyze_matrix_accesses (mi, lhs,
1334 current_indirect_level,
1335 rhs_acc.t_code == POINTER_PLUS_EXPR,
1336 visited, record_accesses);
1339 return current_indirect_level;
1342 /* Given a SSA_VAR (coming from a use statement of the matrix MI),
1343 follow its uses and level of indirection and find out the minimum
1344 indirection level it escapes in (the highest dimension) and the maximum
1345 level it is accessed in (this will be the actual dimension of the
1346 matrix). The information is accumulated in MI.
1347 We look at the immediate uses, if one escapes we finish; if not,
1348 we make a recursive call for each one of the immediate uses of the
1349 resulting SSA name. */
1350 static void
1351 analyze_matrix_accesses (struct matrix_info *mi, tree ssa_var,
1352 int current_indirect_level, bool last_op,
1353 sbitmap visited, bool record_accesses)
1355 imm_use_iterator imm_iter;
1356 use_operand_p use_p;
1358 update_type_size (mi, SSA_NAME_DEF_STMT (ssa_var), ssa_var,
1359 current_indirect_level);
1361 /* We don't go beyond the escaping level when we are performing the
1362 flattening. NOTE: we keep the last indirection level that doesn't
1363 escape. */
1364 if (mi->min_indirect_level_escape > -1
1365 && mi->min_indirect_level_escape <= current_indirect_level)
1366 return;
1368 /* Now go over the uses of the SSA_NAME and check how it is used in
1369 each one of them. We are mainly looking for the pattern INDIRECT_REF,
1370 then a POINTER_PLUS_EXPR, then INDIRECT_REF etc. while in between there could
1371 be any number of copies and casts. */
1372 gcc_assert (TREE_CODE (ssa_var) == SSA_NAME);
1374 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_var)
1376 gimple use_stmt = USE_STMT (use_p);
1377 if (gimple_code (use_stmt) == GIMPLE_PHI)
1378 /* We check all the escaping levels that get to the PHI node
1379 and make sure they are all the same escaping;
1380 if not (which is rare) we let the escaping level be the
1381 minimum level that gets into that PHI because starting from
1382 that level we cannot expect the behavior of the indirections. */
1384 analyze_accesses_for_phi_node (mi, use_stmt, current_indirect_level,
1385 visited, record_accesses);
1387 else if (is_gimple_call (use_stmt))
1388 analyze_accesses_for_call_stmt (mi, ssa_var, use_stmt,
1389 current_indirect_level);
1390 else if (is_gimple_assign (use_stmt))
1391 current_indirect_level =
1392 analyze_accesses_for_assign_stmt (mi, ssa_var, use_stmt,
1393 current_indirect_level, last_op,
1394 visited, record_accesses);
1398 typedef struct
1400 tree fn;
1401 gimple stmt;
1402 } check_var_data;
1404 /* A walk_tree function to go over the VAR_DECL, PARM_DECL nodes of
1405 the malloc size expression and check that those aren't changed
1406 over the function. */
1407 static tree
1408 check_var_notmodified_p (tree * tp, int *walk_subtrees, void *data)
1410 basic_block bb;
1411 tree t = *tp;
1412 check_var_data *callback_data = (check_var_data*) data;
1413 tree fn = callback_data->fn;
1414 gimple_stmt_iterator gsi;
1415 gimple stmt;
1417 if (TREE_CODE (t) != VAR_DECL && TREE_CODE (t) != PARM_DECL)
1418 return NULL_TREE;
1420 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (fn))
1422 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1424 stmt = gsi_stmt (gsi);
1425 if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
1426 continue;
1427 if (gimple_get_lhs (stmt) == t)
1429 callback_data->stmt = stmt;
1430 return t;
1434 *walk_subtrees = 1;
1435 return NULL_TREE;
1438 /* Go backwards in the use-def chains and find out the expression
1439 represented by the possible SSA name in STMT, until it is composed
1440 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1441 we make sure that all the arguments represent the same subexpression,
1442 otherwise we fail. */
1444 static tree
1445 can_calculate_stmt_before_stmt (gimple stmt, sbitmap visited)
1447 tree op1, op2, res;
1448 enum tree_code code;
1450 switch (gimple_code (stmt))
1452 case GIMPLE_ASSIGN:
1453 code = gimple_assign_rhs_code (stmt);
1454 op1 = gimple_assign_rhs1 (stmt);
1456 switch (code)
1458 case POINTER_PLUS_EXPR:
1459 case PLUS_EXPR:
1460 case MINUS_EXPR:
1461 case MULT_EXPR:
1463 op2 = gimple_assign_rhs2 (stmt);
1464 op1 = can_calculate_expr_before_stmt (op1, visited);
1465 if (!op1)
1466 return NULL_TREE;
1467 op2 = can_calculate_expr_before_stmt (op2, visited);
1468 if (op2)
1469 return fold_build2 (code, gimple_expr_type (stmt), op1, op2);
1470 return NULL_TREE;
1472 CASE_CONVERT:
1473 res = can_calculate_expr_before_stmt (op1, visited);
1474 if (res != NULL_TREE)
1475 return build1 (code, gimple_expr_type (stmt), res);
1476 else
1477 return NULL_TREE;
1479 default:
1480 if (gimple_assign_single_p (stmt))
1481 return can_calculate_expr_before_stmt (op1, visited);
1482 else
1483 return NULL_TREE;
1486 case GIMPLE_PHI:
1488 size_t j;
1490 res = NULL_TREE;
1491 /* Make sure all the arguments represent the same value. */
1492 for (j = 0; j < gimple_phi_num_args (stmt); j++)
1494 tree new_res;
1495 tree def = PHI_ARG_DEF (stmt, j);
1497 new_res = can_calculate_expr_before_stmt (def, visited);
1498 if (res == NULL_TREE)
1499 res = new_res;
1500 else if (!new_res || !expressions_equal_p (res, new_res))
1501 return NULL_TREE;
1503 return res;
1506 default:
1507 return NULL_TREE;
1511 /* Go backwards in the use-def chains and find out the expression
1512 represented by the possible SSA name in EXPR, until it is composed
1513 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1514 we make sure that all the arguments represent the same subexpression,
1515 otherwise we fail. */
1516 static tree
1517 can_calculate_expr_before_stmt (tree expr, sbitmap visited)
1519 gimple def_stmt;
1520 tree res;
1522 switch (TREE_CODE (expr))
1524 case SSA_NAME:
1525 /* Case of loop, we don't know to represent this expression. */
1526 if (TEST_BIT (visited, SSA_NAME_VERSION (expr)))
1527 return NULL_TREE;
1529 SET_BIT (visited, SSA_NAME_VERSION (expr));
1530 def_stmt = SSA_NAME_DEF_STMT (expr);
1531 res = can_calculate_stmt_before_stmt (def_stmt, visited);
1532 RESET_BIT (visited, SSA_NAME_VERSION (expr));
1533 return res;
1534 case VAR_DECL:
1535 case PARM_DECL:
1536 case INTEGER_CST:
1537 return expr;
1539 default:
1540 return NULL_TREE;
1544 /* There should be only one allocation function for the dimensions
1545 that don't escape. Here we check the allocation sites in this
1546 function. We must make sure that all the dimensions are allocated
1547 using malloc and that the malloc size parameter expression could be
1548 pre-calculated before the call to the malloc of dimension 0.
1550 Given a candidate matrix for flattening -- MI -- check if it's
1551 appropriate for flattening -- we analyze the allocation
1552 sites that we recorded in the previous analysis. The result of the
1553 analysis is a level of indirection (matrix dimension) in which the
1554 flattening is safe. We check the following conditions:
1555 1. There is only one allocation site for each dimension.
1556 2. The allocation sites of all the dimensions are in the same
1557 function.
1558 (The above two are being taken care of during the analysis when
1559 we check the allocation site).
1560 3. All the dimensions that we flatten are allocated at once; thus
1561 the total size must be known before the allocation of the
1562 dimension 0 (top level) -- we must make sure we represent the
1563 size of the allocation as an expression of global parameters or
1564 constants and that those doesn't change over the function. */
1566 static int
1567 check_allocation_function (void **slot, void *data ATTRIBUTE_UNUSED)
1569 int level;
1570 gimple_stmt_iterator gsi;
1571 basic_block bb_level_0;
1572 struct matrix_info *mi = (struct matrix_info *) *slot;
1573 sbitmap visited;
1575 if (!mi->malloc_for_level)
1576 return 1;
1578 visited = sbitmap_alloc (num_ssa_names);
1580 /* Do nothing if the current function is not the allocation
1581 function of MI. */
1582 if (mi->allocation_function_decl != current_function_decl
1583 /* We aren't in the main allocation function yet. */
1584 || !mi->malloc_for_level[0])
1585 return 1;
1587 for (level = 1; level < mi->max_malloced_level; level++)
1588 if (!mi->malloc_for_level[level])
1589 break;
1591 mark_min_matrix_escape_level (mi, level, NULL);
1593 gsi = gsi_for_stmt (mi->malloc_for_level[0]);
1594 bb_level_0 = gsi.bb;
1596 /* Check if the expression of the size passed to malloc could be
1597 pre-calculated before the malloc of level 0. */
1598 for (level = 1; level < mi->min_indirect_level_escape; level++)
1600 gimple call_stmt;
1601 tree size;
1602 struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE};
1604 call_stmt = mi->malloc_for_level[level];
1606 /* Find the correct malloc information. */
1607 collect_data_for_malloc_call (call_stmt, &mcd);
1609 /* No need to check anticipation for constants. */
1610 if (TREE_CODE (mcd.size_var) == INTEGER_CST)
1612 if (!mi->dimension_size)
1614 mi->dimension_size =
1615 (tree *) xcalloc (mi->min_indirect_level_escape,
1616 sizeof (tree));
1617 mi->dimension_size_orig =
1618 (tree *) xcalloc (mi->min_indirect_level_escape,
1619 sizeof (tree));
1621 mi->dimension_size[level] = mcd.size_var;
1622 mi->dimension_size_orig[level] = mcd.size_var;
1623 continue;
1625 /* ??? Here we should also add the way to calculate the size
1626 expression not only know that it is anticipated. */
1627 sbitmap_zero (visited);
1628 size = can_calculate_expr_before_stmt (mcd.size_var, visited);
1629 if (size == NULL_TREE)
1631 mark_min_matrix_escape_level (mi, level, call_stmt);
1632 if (dump_file)
1633 fprintf (dump_file,
1634 "Matrix %s: Cannot calculate the size of allocation, escaping at level %d\n",
1635 get_name (mi->decl), level);
1636 break;
1638 if (!mi->dimension_size)
1640 mi->dimension_size =
1641 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1642 mi->dimension_size_orig =
1643 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1645 mi->dimension_size[level] = size;
1646 mi->dimension_size_orig[level] = size;
1649 /* We don't need those anymore. */
1650 for (level = mi->min_indirect_level_escape;
1651 level < mi->max_malloced_level; level++)
1652 mi->malloc_for_level[level] = NULL;
1653 return 1;
1656 /* Track all access and allocation sites. */
1657 static void
1658 find_sites_in_func (bool record)
1660 sbitmap visited_stmts_1;
1662 gimple_stmt_iterator gsi;
1663 gimple stmt;
1664 basic_block bb;
1665 struct matrix_info tmpmi, *mi;
1667 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1669 FOR_EACH_BB (bb)
1671 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1673 tree lhs;
1675 stmt = gsi_stmt (gsi);
1676 lhs = gimple_get_lhs (stmt);
1677 if (lhs != NULL_TREE
1678 && TREE_CODE (lhs) == VAR_DECL)
1680 tmpmi.decl = lhs;
1681 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1682 &tmpmi)))
1684 sbitmap_zero (visited_stmts_1);
1685 analyze_matrix_allocation_site (mi, stmt, 0, visited_stmts_1);
1688 if (is_gimple_assign (stmt)
1689 && gimple_assign_single_p (stmt)
1690 && TREE_CODE (lhs) == SSA_NAME
1691 && TREE_CODE (gimple_assign_rhs1 (stmt)) == VAR_DECL)
1693 tmpmi.decl = gimple_assign_rhs1 (stmt);
1694 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1695 &tmpmi)))
1697 sbitmap_zero (visited_stmts_1);
1698 analyze_matrix_accesses (mi, lhs, 0,
1699 false, visited_stmts_1, record);
1704 sbitmap_free (visited_stmts_1);
1707 /* Traverse the use-def chains to see if there are matrices that
1708 are passed through pointers and we cannot know how they are accessed.
1709 For each SSA-name defined by a global variable of our interest,
1710 we traverse the use-def chains of the SSA and follow the indirections,
1711 and record in what level of indirection the use of the variable
1712 escapes. A use of a pointer escapes when it is passed to a function,
1713 stored into memory or assigned (except in malloc and free calls). */
1715 static void
1716 record_all_accesses_in_func (void)
1718 unsigned i;
1719 sbitmap visited_stmts_1;
1721 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1723 for (i = 0; i < num_ssa_names; i++)
1725 struct matrix_info tmpmi, *mi;
1726 tree ssa_var = ssa_name (i);
1727 tree rhs, lhs;
1729 if (!ssa_var
1730 || !is_gimple_assign (SSA_NAME_DEF_STMT (ssa_var))
1731 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (ssa_var)))
1732 continue;
1733 rhs = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (ssa_var));
1734 lhs = gimple_assign_lhs (SSA_NAME_DEF_STMT (ssa_var));
1735 if (TREE_CODE (rhs) != VAR_DECL && TREE_CODE (lhs) != VAR_DECL)
1736 continue;
1738 /* If the RHS is a matrix that we want to analyze, follow the def-use
1739 chain for this SSA_VAR and check for escapes or apply the
1740 flattening. */
1741 tmpmi.decl = rhs;
1742 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi)))
1744 /* This variable will track the visited PHI nodes, so we can limit
1745 its size to the maximum number of SSA names. */
1746 sbitmap_zero (visited_stmts_1);
1747 analyze_matrix_accesses (mi, ssa_var,
1748 0, false, visited_stmts_1, true);
1752 sbitmap_free (visited_stmts_1);
1755 /* Used when we want to convert the expression: RESULT = something *
1756 ORIG to RESULT = something * NEW_VAL. If ORIG and NEW_VAL are power
1757 of 2, shift operations can be done, else division and
1758 multiplication. */
1760 static tree
1761 compute_offset (HOST_WIDE_INT orig, HOST_WIDE_INT new_val, tree result)
1764 int x, y;
1765 tree result1, ratio, log, orig_tree, new_tree;
1767 x = exact_log2 (orig);
1768 y = exact_log2 (new_val);
1770 if (x != -1 && y != -1)
1772 if (x == y)
1773 return result;
1774 else if (x > y)
1776 log = build_int_cst (TREE_TYPE (result), x - y);
1777 result1 =
1778 fold_build2 (LSHIFT_EXPR, TREE_TYPE (result), result, log);
1779 return result1;
1781 log = build_int_cst (TREE_TYPE (result), y - x);
1782 result1 = fold_build2 (RSHIFT_EXPR, TREE_TYPE (result), result, log);
1784 return result1;
1786 orig_tree = build_int_cst (TREE_TYPE (result), orig);
1787 new_tree = build_int_cst (TREE_TYPE (result), new_val);
1788 ratio = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (result), result, orig_tree);
1789 result1 = fold_build2 (MULT_EXPR, TREE_TYPE (result), ratio, new_tree);
1791 return result1;
1795 /* We know that we are allowed to perform matrix flattening (according to the
1796 escape analysis), so we traverse the use-def chains of the SSA vars
1797 defined by the global variables pointing to the matrices of our interest.
1798 in each use of the SSA we calculate the offset from the base address
1799 according to the following equation:
1801 a[I1][I2]...[Ik] , where D1..Dk is the length of each dimension and the
1802 escaping level is m <= k, and a' is the new allocated matrix,
1803 will be translated to :
1805 b[I(m+1)]...[Ik]
1807 where
1808 b = a' + I1*D2...*Dm + I2*D3...Dm + ... + Im
1811 static int
1812 transform_access_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1814 gimple_stmt_iterator gsi;
1815 struct matrix_info *mi = (struct matrix_info *) *slot;
1816 int min_escape_l = mi->min_indirect_level_escape;
1817 struct access_site_info *acc_info;
1818 enum tree_code code;
1819 int i;
1821 if (min_escape_l < 2 || !mi->access_l)
1822 return 1;
1823 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
1824 i++)
1826 /* This is possible because we collect the access sites before
1827 we determine the final minimum indirection level. */
1828 if (acc_info->level >= min_escape_l)
1830 free (acc_info);
1831 continue;
1833 if (acc_info->is_alloc)
1835 if (acc_info->level >= 0 && gimple_bb (acc_info->stmt))
1837 ssa_op_iter iter;
1838 tree def;
1839 gimple stmt = acc_info->stmt;
1840 tree lhs;
1842 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
1843 mark_sym_for_renaming (SSA_NAME_VAR (def));
1844 gsi = gsi_for_stmt (stmt);
1845 gcc_assert (is_gimple_assign (acc_info->stmt));
1846 lhs = gimple_assign_lhs (acc_info->stmt);
1847 if (TREE_CODE (lhs) == SSA_NAME
1848 && acc_info->level < min_escape_l - 1)
1850 imm_use_iterator imm_iter;
1851 use_operand_p use_p;
1852 gimple use_stmt;
1854 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
1855 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1857 tree rhs, tmp;
1858 gimple new_stmt;
1860 gcc_assert (gimple_assign_rhs_code (acc_info->stmt)
1861 == INDIRECT_REF);
1862 /* Emit convert statement to convert to type of use. */
1863 tmp = create_tmp_var (TREE_TYPE (lhs), "new");
1864 add_referenced_var (tmp);
1865 rhs = gimple_assign_rhs1 (acc_info->stmt);
1866 rhs = fold_convert (TREE_TYPE (tmp),
1867 TREE_OPERAND (rhs, 0));
1868 new_stmt = gimple_build_assign (tmp, rhs);
1869 tmp = make_ssa_name (tmp, new_stmt);
1870 gimple_assign_set_lhs (new_stmt, tmp);
1871 gsi = gsi_for_stmt (acc_info->stmt);
1872 gsi_insert_after (&gsi, new_stmt, GSI_SAME_STMT);
1873 SET_USE (use_p, tmp);
1876 if (acc_info->level < min_escape_l - 1)
1877 gsi_remove (&gsi, true);
1879 free (acc_info);
1880 continue;
1882 code = gimple_assign_rhs_code (acc_info->stmt);
1883 if (code == INDIRECT_REF
1884 && acc_info->level < min_escape_l - 1)
1886 /* Replace the INDIRECT_REF with NOP (cast) usually we are casting
1887 from "pointer to type" to "type". */
1888 tree t =
1889 build1 (NOP_EXPR, TREE_TYPE (gimple_assign_rhs1 (acc_info->stmt)),
1890 TREE_OPERAND (gimple_assign_rhs1 (acc_info->stmt), 0));
1891 gimple_assign_set_rhs_code (acc_info->stmt, NOP_EXPR);
1892 gimple_assign_set_rhs1 (acc_info->stmt, t);
1894 else if (code == POINTER_PLUS_EXPR
1895 && acc_info->level < (min_escape_l))
1897 imm_use_iterator imm_iter;
1898 use_operand_p use_p;
1900 tree offset;
1901 int k = acc_info->level;
1902 tree num_elements, total_elements;
1903 tree tmp1;
1904 tree d_size = mi->dimension_size[k];
1906 /* We already make sure in the analysis that the first operand
1907 is the base and the second is the offset. */
1908 offset = acc_info->offset;
1909 if (mi->dim_map[k] == min_escape_l - 1)
1911 if (!check_transpose_p || mi->is_transposed_p == false)
1912 tmp1 = offset;
1913 else
1915 tree new_offset;
1916 tree d_type_size, d_type_size_k;
1918 d_type_size = size_int (mi->dimension_type_size[min_escape_l]);
1919 d_type_size_k = size_int (mi->dimension_type_size[k + 1]);
1921 new_offset =
1922 compute_offset (mi->dimension_type_size[min_escape_l],
1923 mi->dimension_type_size[k + 1], offset);
1925 total_elements = new_offset;
1926 if (new_offset != offset)
1928 gsi = gsi_for_stmt (acc_info->stmt);
1929 tmp1 = force_gimple_operand_gsi (&gsi, total_elements,
1930 true, NULL,
1931 true, GSI_SAME_STMT);
1933 else
1934 tmp1 = offset;
1937 else
1939 d_size = mi->dimension_size[mi->dim_map[k] + 1];
1940 num_elements =
1941 fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, acc_info->index),
1942 fold_convert (sizetype, d_size));
1943 add_referenced_var (d_size);
1944 gsi = gsi_for_stmt (acc_info->stmt);
1945 tmp1 = force_gimple_operand_gsi (&gsi, num_elements, true,
1946 NULL, true, GSI_SAME_STMT);
1948 /* Replace the offset if needed. */
1949 if (tmp1 != offset)
1951 if (TREE_CODE (offset) == SSA_NAME)
1953 gimple use_stmt;
1955 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, offset)
1956 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1957 if (use_stmt == acc_info->stmt)
1958 SET_USE (use_p, tmp1);
1960 else
1962 gcc_assert (TREE_CODE (offset) == INTEGER_CST);
1963 gimple_assign_set_rhs2 (acc_info->stmt, tmp1);
1964 update_stmt (acc_info->stmt);
1968 /* ??? meanwhile this happens because we record the same access
1969 site more than once; we should be using a hash table to
1970 avoid this and insert the STMT of the access site only
1971 once.
1972 else
1973 gcc_unreachable (); */
1974 free (acc_info);
1976 VEC_free (access_site_info_p, heap, mi->access_l);
1978 update_ssa (TODO_update_ssa);
1979 #ifdef ENABLE_CHECKING
1980 verify_ssa (true);
1981 #endif
1982 return 1;
1985 /* Sort A array of counts. Arrange DIM_MAP to reflect the new order. */
1987 static void
1988 sort_dim_hot_level (gcov_type * a, int *dim_map, int n)
1990 int i, j, tmp1;
1991 gcov_type tmp;
1993 for (i = 0; i < n - 1; i++)
1995 for (j = 0; j < n - 1 - i; j++)
1997 if (a[j + 1] < a[j])
1999 tmp = a[j]; /* swap a[j] and a[j+1] */
2000 a[j] = a[j + 1];
2001 a[j + 1] = tmp;
2002 tmp1 = dim_map[j];
2003 dim_map[j] = dim_map[j + 1];
2004 dim_map[j + 1] = tmp1;
2010 /* Replace multiple mallocs (one for each dimension) to one malloc
2011 with the size of DIM1*DIM2*...*DIMN*size_of_element
2012 Make sure that we hold the size in the malloc site inside a
2013 new global variable; this way we ensure that the size doesn't
2014 change and it is accessible from all the other functions that
2015 uses the matrix. Also, the original calls to free are deleted,
2016 and replaced by a new call to free the flattened matrix. */
2018 static int
2019 transform_allocation_sites (void **slot, void *data ATTRIBUTE_UNUSED)
2021 int i;
2022 struct matrix_info *mi;
2023 tree type, oldfn, prev_dim_size;
2024 gimple call_stmt_0, use_stmt;
2025 struct cgraph_node *c_node;
2026 struct cgraph_edge *e;
2027 gimple_stmt_iterator gsi;
2028 struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE};
2029 HOST_WIDE_INT element_size;
2031 imm_use_iterator imm_iter;
2032 use_operand_p use_p;
2033 tree old_size_0, tmp;
2034 int min_escape_l;
2035 int id;
2037 mi = (struct matrix_info *) *slot;
2039 min_escape_l = mi->min_indirect_level_escape;
2041 if (!mi->malloc_for_level)
2042 mi->min_indirect_level_escape = 0;
2044 if (mi->min_indirect_level_escape < 2)
2045 return 1;
2047 mi->dim_map = (int *) xcalloc (mi->min_indirect_level_escape, sizeof (int));
2048 for (i = 0; i < mi->min_indirect_level_escape; i++)
2049 mi->dim_map[i] = i;
2050 if (check_transpose_p)
2052 int i;
2054 if (dump_file)
2056 fprintf (dump_file, "Matrix %s:\n", get_name (mi->decl));
2057 for (i = 0; i < min_escape_l; i++)
2059 fprintf (dump_file, "dim %d before sort ", i);
2060 if (mi->dim_hot_level)
2061 fprintf (dump_file,
2062 "count is " HOST_WIDEST_INT_PRINT_DEC " \n",
2063 mi->dim_hot_level[i]);
2066 sort_dim_hot_level (mi->dim_hot_level, mi->dim_map,
2067 mi->min_indirect_level_escape);
2068 if (dump_file)
2069 for (i = 0; i < min_escape_l; i++)
2071 fprintf (dump_file, "dim %d after sort\n", i);
2072 if (mi->dim_hot_level)
2073 fprintf (dump_file, "count is " HOST_WIDE_INT_PRINT_DEC
2074 " \n", (HOST_WIDE_INT) mi->dim_hot_level[i]);
2076 for (i = 0; i < mi->min_indirect_level_escape; i++)
2078 if (dump_file)
2079 fprintf (dump_file, "dim_map[%d] after sort %d\n", i,
2080 mi->dim_map[i]);
2081 if (mi->dim_map[i] != i)
2083 if (dump_file)
2084 fprintf (dump_file,
2085 "Transposed dimensions: dim %d is now dim %d\n",
2086 mi->dim_map[i], i);
2087 mi->is_transposed_p = true;
2091 else
2093 for (i = 0; i < mi->min_indirect_level_escape; i++)
2094 mi->dim_map[i] = i;
2096 /* Call statement of allocation site of level 0. */
2097 call_stmt_0 = mi->malloc_for_level[0];
2099 /* Finds the correct malloc information. */
2100 collect_data_for_malloc_call (call_stmt_0, &mcd);
2102 mi->dimension_size[0] = mcd.size_var;
2103 mi->dimension_size_orig[0] = mcd.size_var;
2104 /* Make sure that the variables in the size expression for
2105 all the dimensions (above level 0) aren't modified in
2106 the allocation function. */
2107 for (i = 1; i < mi->min_indirect_level_escape; i++)
2109 tree t;
2110 check_var_data data;
2112 /* mi->dimension_size must contain the expression of the size calculated
2113 in check_allocation_function. */
2114 gcc_assert (mi->dimension_size[i]);
2116 data.fn = mi->allocation_function_decl;
2117 data.stmt = NULL;
2118 t = walk_tree_without_duplicates (&(mi->dimension_size[i]),
2119 check_var_notmodified_p,
2120 &data);
2121 if (t != NULL_TREE)
2123 mark_min_matrix_escape_level (mi, i, data.stmt);
2124 break;
2128 if (mi->min_indirect_level_escape < 2)
2129 return 1;
2131 /* Since we should make sure that the size expression is available
2132 before the call to malloc of level 0. */
2133 gsi = gsi_for_stmt (call_stmt_0);
2135 /* Find out the size of each dimension by looking at the malloc
2136 sites and create a global variable to hold it.
2137 We add the assignment to the global before the malloc of level 0. */
2139 /* To be able to produce gimple temporaries. */
2140 oldfn = current_function_decl;
2141 current_function_decl = mi->allocation_function_decl;
2142 push_cfun (DECL_STRUCT_FUNCTION (mi->allocation_function_decl));
2144 /* Set the dimension sizes as follows:
2145 DIM_SIZE[i] = DIM_SIZE[n] * ... * DIM_SIZE[i]
2146 where n is the maximum non escaping level. */
2147 element_size = mi->dimension_type_size[mi->min_indirect_level_escape];
2148 prev_dim_size = NULL_TREE;
2150 for (i = mi->min_indirect_level_escape - 1; i >= 0; i--)
2152 tree dim_size, dim_var;
2153 gimple stmt;
2154 tree d_type_size;
2156 /* Now put the size expression in a global variable and initialize it to
2157 the size expression before the malloc of level 0. */
2158 dim_var =
2159 add_new_static_var (TREE_TYPE
2160 (mi->dimension_size_orig[mi->dim_map[i]]));
2161 type = TREE_TYPE (mi->dimension_size_orig[mi->dim_map[i]]);
2163 /* DIM_SIZE = MALLOC_SIZE_PARAM / TYPE_SIZE. */
2164 /* Find which dim ID becomes dim I. */
2165 for (id = 0; id < mi->min_indirect_level_escape; id++)
2166 if (mi->dim_map[id] == i)
2167 break;
2168 d_type_size =
2169 build_int_cst (type, mi->dimension_type_size[id + 1]);
2170 if (!prev_dim_size)
2171 prev_dim_size = build_int_cst (type, element_size);
2172 if (!check_transpose_p && i == mi->min_indirect_level_escape - 1)
2174 dim_size = mi->dimension_size_orig[id];
2176 else
2178 dim_size =
2179 fold_build2 (TRUNC_DIV_EXPR, type, mi->dimension_size_orig[id],
2180 d_type_size);
2182 dim_size = fold_build2 (MULT_EXPR, type, dim_size, prev_dim_size);
2184 dim_size = force_gimple_operand_gsi (&gsi, dim_size, true, NULL,
2185 true, GSI_SAME_STMT);
2186 /* GLOBAL_HOLDING_THE_SIZE = DIM_SIZE. */
2187 stmt = gimple_build_assign (dim_var, dim_size);
2188 mark_symbols_for_renaming (stmt);
2189 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2191 prev_dim_size = mi->dimension_size[i] = dim_var;
2193 update_ssa (TODO_update_ssa);
2194 /* Replace the malloc size argument in the malloc of level 0 to be
2195 the size of all the dimensions. */
2196 c_node = cgraph_node (mi->allocation_function_decl);
2197 old_size_0 = gimple_call_arg (call_stmt_0, 0);
2198 tmp = force_gimple_operand_gsi (&gsi, mi->dimension_size[0], true,
2199 NULL, true, GSI_SAME_STMT);
2200 if (TREE_CODE (old_size_0) == SSA_NAME)
2202 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, old_size_0)
2203 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2204 if (use_stmt == call_stmt_0)
2205 SET_USE (use_p, tmp);
2207 /* When deleting the calls to malloc we need also to remove the edge from
2208 the call graph to keep it consistent. Notice that cgraph_edge may
2209 create a new node in the call graph if there is no node for the given
2210 declaration; this shouldn't be the case but currently there is no way to
2211 check this outside of "cgraph.c". */
2212 for (i = 1; i < mi->min_indirect_level_escape; i++)
2214 gimple_stmt_iterator gsi;
2215 gimple use_stmt1 = NULL;
2217 gimple call_stmt = mi->malloc_for_level[i];
2218 gcc_assert (is_gimple_call (call_stmt));
2219 e = cgraph_edge (c_node, call_stmt);
2220 gcc_assert (e);
2221 cgraph_remove_edge (e);
2222 gsi = gsi_for_stmt (call_stmt);
2223 /* Remove the call stmt. */
2224 gsi_remove (&gsi, true);
2225 /* remove the type cast stmt. */
2226 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2227 gimple_call_lhs (call_stmt))
2229 use_stmt1 = use_stmt;
2230 gsi = gsi_for_stmt (use_stmt);
2231 gsi_remove (&gsi, true);
2233 /* Remove the assignment of the allocated area. */
2234 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2235 gimple_get_lhs (use_stmt1))
2237 gsi = gsi_for_stmt (use_stmt);
2238 gsi_remove (&gsi, true);
2241 update_ssa (TODO_update_ssa);
2242 #ifdef ENABLE_CHECKING
2243 verify_ssa (true);
2244 #endif
2245 /* Delete the calls to free. */
2246 for (i = 1; i < mi->min_indirect_level_escape; i++)
2248 gimple_stmt_iterator gsi;
2250 /* ??? wonder why this case is possible but we failed on it once. */
2251 if (!mi->free_stmts[i].stmt)
2252 continue;
2254 c_node = cgraph_node (mi->free_stmts[i].func);
2255 gcc_assert (is_gimple_call (mi->free_stmts[i].stmt));
2256 e = cgraph_edge (c_node, mi->free_stmts[i].stmt);
2257 gcc_assert (e);
2258 cgraph_remove_edge (e);
2259 current_function_decl = mi->free_stmts[i].func;
2260 set_cfun (DECL_STRUCT_FUNCTION (mi->free_stmts[i].func));
2261 gsi = gsi_for_stmt (mi->free_stmts[i].stmt);
2262 gsi_remove (&gsi, true);
2264 /* Return to the previous situation. */
2265 current_function_decl = oldfn;
2266 pop_cfun ();
2267 return 1;
2272 /* Print out the results of the escape analysis. */
2273 static int
2274 dump_matrix_reorg_analysis (void **slot, void *data ATTRIBUTE_UNUSED)
2276 struct matrix_info *mi = (struct matrix_info *) *slot;
2278 if (!dump_file)
2279 return 1;
2280 fprintf (dump_file, "Matrix \"%s\"; Escaping Level: %d, Num Dims: %d,",
2281 get_name (mi->decl), mi->min_indirect_level_escape, mi->num_dims);
2282 fprintf (dump_file, " Malloc Dims: %d, ", mi->max_malloced_level);
2283 fprintf (dump_file, "\n");
2284 if (mi->min_indirect_level_escape >= 2)
2285 fprintf (dump_file, "Flattened %d dimensions \n",
2286 mi->min_indirect_level_escape);
2287 return 1;
2290 /* Perform matrix flattening. */
2292 static unsigned int
2293 matrix_reorg (void)
2295 struct cgraph_node *node;
2297 if (profile_info)
2298 check_transpose_p = true;
2299 else
2300 check_transpose_p = false;
2301 /* If there are hand written vectors, we skip this optimization. */
2302 for (node = cgraph_nodes; node; node = node->next)
2303 if (!may_flatten_matrices (node))
2304 return 0;
2305 matrices_to_reorg = htab_create (37, mtt_info_hash, mtt_info_eq, mat_free);
2306 /* Find and record all potential matrices in the program. */
2307 find_matrices_decl ();
2308 /* Analyze the accesses of the matrices (escaping analysis). */
2309 for (node = cgraph_nodes; node; node = node->next)
2310 if (node->analyzed)
2312 tree temp_fn;
2314 temp_fn = current_function_decl;
2315 current_function_decl = node->decl;
2316 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2317 bitmap_obstack_initialize (NULL);
2318 gimple_register_cfg_hooks ();
2320 if (!gimple_in_ssa_p (cfun))
2322 free_dominance_info (CDI_DOMINATORS);
2323 free_dominance_info (CDI_POST_DOMINATORS);
2324 pop_cfun ();
2325 current_function_decl = temp_fn;
2326 bitmap_obstack_release (NULL);
2328 return 0;
2331 #ifdef ENABLE_CHECKING
2332 verify_flow_info ();
2333 #endif
2335 if (!matrices_to_reorg)
2337 free_dominance_info (CDI_DOMINATORS);
2338 free_dominance_info (CDI_POST_DOMINATORS);
2339 pop_cfun ();
2340 current_function_decl = temp_fn;
2341 bitmap_obstack_release (NULL);
2343 return 0;
2346 /* Create htap for phi nodes. */
2347 htab_mat_acc_phi_nodes = htab_create (37, mat_acc_phi_hash,
2348 mat_acc_phi_eq, free);
2349 if (!check_transpose_p)
2350 find_sites_in_func (false);
2351 else
2353 find_sites_in_func (true);
2354 loop_optimizer_init (LOOPS_NORMAL);
2355 if (current_loops)
2356 scev_initialize ();
2357 htab_traverse (matrices_to_reorg, analyze_transpose, NULL);
2358 if (current_loops)
2360 scev_finalize ();
2361 loop_optimizer_finalize ();
2362 current_loops = NULL;
2365 /* If the current function is the allocation function for any of
2366 the matrices we check its allocation and the escaping level. */
2367 htab_traverse (matrices_to_reorg, check_allocation_function, NULL);
2368 free_dominance_info (CDI_DOMINATORS);
2369 free_dominance_info (CDI_POST_DOMINATORS);
2370 pop_cfun ();
2371 current_function_decl = temp_fn;
2372 bitmap_obstack_release (NULL);
2374 htab_traverse (matrices_to_reorg, transform_allocation_sites, NULL);
2375 /* Now transform the accesses. */
2376 for (node = cgraph_nodes; node; node = node->next)
2377 if (node->analyzed)
2379 /* Remember that allocation sites have been handled. */
2380 tree temp_fn;
2382 temp_fn = current_function_decl;
2383 current_function_decl = node->decl;
2384 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2385 bitmap_obstack_initialize (NULL);
2386 gimple_register_cfg_hooks ();
2387 record_all_accesses_in_func ();
2388 htab_traverse (matrices_to_reorg, transform_access_sites, NULL);
2389 free_dominance_info (CDI_DOMINATORS);
2390 free_dominance_info (CDI_POST_DOMINATORS);
2391 pop_cfun ();
2392 current_function_decl = temp_fn;
2393 bitmap_obstack_release (NULL);
2395 htab_traverse (matrices_to_reorg, dump_matrix_reorg_analysis, NULL);
2397 current_function_decl = NULL;
2398 set_cfun (NULL);
2399 matrices_to_reorg = NULL;
2400 return 0;
2404 /* The condition for matrix flattening to be performed. */
2405 static bool
2406 gate_matrix_reorg (void)
2408 return flag_ipa_matrix_reorg && flag_whole_program;
2411 struct simple_ipa_opt_pass pass_ipa_matrix_reorg =
2414 SIMPLE_IPA_PASS,
2415 "matrix-reorg", /* name */
2416 gate_matrix_reorg, /* gate */
2417 matrix_reorg, /* execute */
2418 NULL, /* sub */
2419 NULL, /* next */
2420 0, /* static_pass_number */
2421 TV_NONE, /* tv_id */
2422 0, /* properties_required */
2423 0, /* properties_provided */
2424 0, /* properties_destroyed */
2425 0, /* todo_flags_start */
2426 TODO_dump_cgraph | TODO_dump_func /* todo_flags_finish */