dumping cleanup phase 1 -- Removing TODO_dump_func
[official-gcc.git] / gcc / matrix-reorg.c
blobdfc189632d5a0e17dc7666f926a93d07d6864cf0
1 /* Matrix layout transformations.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
3 Contributed by Razya Ladelsky <razya@il.ibm.com>
4 Originally written by Revital Eres and Mustafa Hagog.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
23 Matrix flattening optimization tries to replace a N-dimensional
24 matrix with its equivalent M-dimensional matrix, where M < N.
25 This first implementation focuses on global matrices defined dynamically.
27 When N==1, we actually flatten the whole matrix.
28 For instance consider a two-dimensional array a [dim1] [dim2].
29 The code for allocating space for it usually looks like:
31 a = (int **) malloc(dim1 * sizeof(int *));
32 for (i=0; i<dim1; i++)
33 a[i] = (int *) malloc (dim2 * sizeof(int));
35 If the array "a" is found suitable for this optimization,
36 its allocation is replaced by:
38 a = (int *) malloc (dim1 * dim2 *sizeof(int));
40 and all the references to a[i][j] are replaced by a[i * dim2 + j].
42 The two main phases of the optimization are the analysis
43 and transformation.
44 The driver of the optimization is matrix_reorg ().
48 Analysis phase:
49 ===============
51 We'll number the dimensions outside-in, meaning the most external
52 is 0, then 1, and so on.
53 The analysis part of the optimization determines K, the escape
54 level of a N-dimensional matrix (K <= N), that allows flattening of
55 the external dimensions 0,1,..., K-1. Escape level 0 means that the
56 whole matrix escapes and no flattening is possible.
58 The analysis part is implemented in analyze_matrix_allocation_site()
59 and analyze_matrix_accesses().
61 Transformation phase:
62 =====================
63 In this phase we define the new flattened matrices that replace the
64 original matrices in the code.
65 Implemented in transform_allocation_sites(),
66 transform_access_sites().
68 Matrix Transposing
69 ==================
70 The idea of Matrix Transposing is organizing the matrix in a different
71 layout such that the dimensions are reordered.
72 This could produce better cache behavior in some cases.
74 For example, lets look at the matrix accesses in the following loop:
76 for (i=0; i<N; i++)
77 for (j=0; j<M; j++)
78 access to a[i][j]
80 This loop can produce good cache behavior because the elements of
81 the inner dimension are accessed sequentially.
83 However, if the accesses of the matrix were of the following form:
85 for (i=0; i<N; i++)
86 for (j=0; j<M; j++)
87 access to a[j][i]
89 In this loop we iterate the columns and not the rows.
90 Therefore, replacing the rows and columns
91 would have had an organization with better (cache) locality.
92 Replacing the dimensions of the matrix is called matrix transposing.
94 This example, of course, could be enhanced to multiple dimensions matrices
95 as well.
97 Since a program could include all kind of accesses, there is a decision
98 mechanism, implemented in analyze_transpose(), which implements a
99 heuristic that tries to determine whether to transpose the matrix or not,
100 according to the form of the more dominant accesses.
101 This decision is transferred to the flattening mechanism, and whether
102 the matrix was transposed or not, the matrix is flattened (if possible).
104 This decision making is based on profiling information and loop information.
105 If profiling information is available, decision making mechanism will be
106 operated, otherwise the matrix will only be flattened (if possible).
108 Both optimizations are described in the paper "Matrix flattening and
109 transposing in GCC" which was presented in GCC summit 2006.
110 http://www.gccsummit.org/2006/2006-GCC-Summit-Proceedings.pdf. */
112 #include "config.h"
113 #include "system.h"
114 #include "coretypes.h"
115 #include "tm.h"
116 #include "tree.h"
117 #include "rtl.h"
118 #include "tree-inline.h"
119 #include "tree-flow.h"
120 #include "tree-flow-inline.h"
121 #include "langhooks.h"
122 #include "hashtab.h"
123 #include "flags.h"
124 #include "ggc.h"
125 #include "debug.h"
126 #include "target.h"
127 #include "cgraph.h"
128 #include "diagnostic-core.h"
129 #include "timevar.h"
130 #include "params.h"
131 #include "fibheap.h"
132 #include "intl.h"
133 #include "function.h"
134 #include "basic-block.h"
135 #include "cfgloop.h"
136 #include "tree-iterator.h"
137 #include "tree-pass.h"
138 #include "opts.h"
139 #include "tree-data-ref.h"
140 #include "tree-chrec.h"
141 #include "tree-scalar-evolution.h"
142 #include "tree-ssa-sccvn.h"
144 /* We need to collect a lot of data from the original malloc,
145 particularly as the gimplifier has converted:
147 orig_var = (struct_type *) malloc (x * sizeof (struct_type *));
149 into
151 T3 = <constant> ; ** <constant> is amount to malloc; precomputed **
152 T4 = malloc (T3);
153 T5 = (struct_type *) T4;
154 orig_var = T5;
156 The following struct fields allow us to collect all the necessary data from
157 the gimplified program. The comments in the struct below are all based
158 on the gimple example above. */
160 struct malloc_call_data
162 gimple call_stmt; /* Tree for "T4 = malloc (T3);" */
163 tree size_var; /* Var decl for T3. */
164 tree malloc_size; /* Tree for "<constant>", the rhs assigned to T3. */
167 static tree can_calculate_expr_before_stmt (tree, sbitmap);
168 static tree can_calculate_stmt_before_stmt (gimple, sbitmap);
170 /* The front end of the compiler, when parsing statements of the form:
172 var = (type_cast) malloc (sizeof (type));
174 always converts this single statement into the following statements
175 (GIMPLE form):
177 T.1 = sizeof (type);
178 T.2 = malloc (T.1);
179 T.3 = (type_cast) T.2;
180 var = T.3;
182 Since we need to create new malloc statements and modify the original
183 statements somewhat, we need to find all four of the above statements.
184 Currently record_call_1 (called for building cgraph edges) finds and
185 records the statements containing the actual call to malloc, but we
186 need to find the rest of the variables/statements on our own. That
187 is what the following function does. */
188 static void
189 collect_data_for_malloc_call (gimple stmt, struct malloc_call_data *m_data)
191 tree size_var = NULL;
192 tree malloc_fn_decl;
193 tree arg1;
195 gcc_assert (is_gimple_call (stmt));
197 malloc_fn_decl = gimple_call_fndecl (stmt);
198 if (malloc_fn_decl == NULL
199 || DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
200 return;
202 arg1 = gimple_call_arg (stmt, 0);
203 size_var = arg1;
205 m_data->call_stmt = stmt;
206 m_data->size_var = size_var;
207 if (TREE_CODE (size_var) != VAR_DECL)
208 m_data->malloc_size = size_var;
209 else
210 m_data->malloc_size = NULL_TREE;
213 /* Information about matrix access site.
214 For example: if an access site of matrix arr is arr[i][j]
215 the ACCESS_SITE_INFO structure will have the address
216 of arr as its stmt. The INDEX_INFO will hold information about the
217 initial address and index of each dimension. */
218 struct access_site_info
220 /* The statement (MEM_REF or POINTER_PLUS_EXPR). */
221 gimple stmt;
223 /* In case of POINTER_PLUS_EXPR, what is the offset. */
224 tree offset;
226 /* The index which created the offset. */
227 tree index;
229 /* The indirection level of this statement. */
230 int level;
232 /* TRUE for allocation site FALSE for access site. */
233 bool is_alloc;
235 /* The function containing the access site. */
236 tree function_decl;
238 /* This access is iterated in the inner most loop */
239 bool iterated_by_inner_most_loop_p;
242 typedef struct access_site_info *access_site_info_p;
243 DEF_VEC_P (access_site_info_p);
244 DEF_VEC_ALLOC_P (access_site_info_p, heap);
246 /* Calls to free when flattening a matrix. */
248 struct free_info
250 gimple stmt;
251 tree func;
254 /* Information about matrix to flatten. */
255 struct matrix_info
257 /* Decl tree of this matrix. */
258 tree decl;
259 /* Number of dimensions; number
260 of "*" in the type declaration. */
261 int num_dims;
263 /* Minimum indirection level that escapes, 0 means that
264 the whole matrix escapes, k means that dimensions
265 0 to ACTUAL_DIM - k escapes. */
266 int min_indirect_level_escape;
268 gimple min_indirect_level_escape_stmt;
270 /* Hold the allocation site for each level (dimension).
271 We can use NUM_DIMS as the upper bound and allocate the array
272 once with this number of elements and no need to use realloc and
273 MAX_MALLOCED_LEVEL. */
274 gimple *malloc_for_level;
276 int max_malloced_level;
278 /* Is the matrix transposed. */
279 bool is_transposed_p;
281 /* The location of the allocation sites (they must be in one
282 function). */
283 tree allocation_function_decl;
285 /* The calls to free for each level of indirection. */
286 struct free_info *free_stmts;
288 /* An array which holds for each dimension its size. where
289 dimension 0 is the outer most (one that contains all the others).
291 tree *dimension_size;
293 /* An array which holds for each dimension it's original size
294 (before transposing and flattening take place). */
295 tree *dimension_size_orig;
297 /* An array which holds for each dimension the size of the type of
298 of elements accessed in that level (in bytes). */
299 HOST_WIDE_INT *dimension_type_size;
301 int dimension_type_size_len;
303 /* An array collecting the count of accesses for each dimension. */
304 gcov_type *dim_hot_level;
306 /* An array of the accesses to be flattened.
307 elements are of type "struct access_site_info *". */
308 VEC (access_site_info_p, heap) * access_l;
310 /* A map of how the dimensions will be organized at the end of
311 the analyses. */
312 int *dim_map;
315 /* In each phi node we want to record the indirection level we have when we
316 get to the phi node. Usually we will have phi nodes with more than two
317 arguments, then we must assure that all of them get to the phi node with
318 the same indirection level, otherwise it's not safe to do the flattening.
319 So we record the information regarding the indirection level each time we
320 get to the phi node in this hash table. */
322 struct matrix_access_phi_node
324 gimple phi;
325 int indirection_level;
328 /* We use this structure to find if the SSA variable is accessed inside the
329 tree and record the tree containing it. */
331 struct ssa_acc_in_tree
333 /* The variable whose accesses in the tree we are looking for. */
334 tree ssa_var;
335 /* The tree and code inside it the ssa_var is accessed, currently
336 it could be an MEM_REF or CALL_EXPR. */
337 enum tree_code t_code;
338 tree t_tree;
339 /* The place in the containing tree. */
340 tree *tp;
341 tree second_op;
342 bool var_found;
345 static void analyze_matrix_accesses (struct matrix_info *, tree, int, bool,
346 sbitmap, bool);
347 static int transform_allocation_sites (void **, void *);
348 static int transform_access_sites (void **, void *);
349 static int analyze_transpose (void **, void *);
350 static int dump_matrix_reorg_analysis (void **, void *);
352 static bool check_transpose_p;
354 /* Hash function used for the phi nodes. */
356 static hashval_t
357 mat_acc_phi_hash (const void *p)
359 const struct matrix_access_phi_node *const ma_phi =
360 (const struct matrix_access_phi_node *) p;
362 return htab_hash_pointer (ma_phi->phi);
365 /* Equality means phi node pointers are the same. */
367 static int
368 mat_acc_phi_eq (const void *p1, const void *p2)
370 const struct matrix_access_phi_node *const phi1 =
371 (const struct matrix_access_phi_node *) p1;
372 const struct matrix_access_phi_node *const phi2 =
373 (const struct matrix_access_phi_node *) p2;
375 if (phi1->phi == phi2->phi)
376 return 1;
378 return 0;
381 /* Hold the PHI nodes we visit during the traversal for escaping
382 analysis. */
383 static htab_t htab_mat_acc_phi_nodes = NULL;
385 /* This hash-table holds the information about the matrices we are
386 going to handle. */
387 static htab_t matrices_to_reorg = NULL;
389 /* Return a hash for MTT, which is really a "matrix_info *". */
390 static hashval_t
391 mtt_info_hash (const void *mtt)
393 return htab_hash_pointer (((const struct matrix_info *) mtt)->decl);
396 /* Return true if MTT1 and MTT2 (which are really both of type
397 "matrix_info *") refer to the same decl. */
398 static int
399 mtt_info_eq (const void *mtt1, const void *mtt2)
401 const struct matrix_info *const i1 = (const struct matrix_info *) mtt1;
402 const struct matrix_info *const i2 = (const struct matrix_info *) mtt2;
404 if (i1->decl == i2->decl)
405 return true;
407 return false;
410 /* Return false if STMT may contain a vector expression.
411 In this situation, all matrices should not be flattened. */
412 static bool
413 may_flatten_matrices_1 (gimple stmt)
415 switch (gimple_code (stmt))
417 case GIMPLE_ASSIGN:
418 case GIMPLE_CALL:
419 if (!gimple_has_lhs (stmt))
420 return true;
421 if (TREE_CODE (TREE_TYPE (gimple_get_lhs (stmt))) == VECTOR_TYPE)
423 if (dump_file)
424 fprintf (dump_file,
425 "Found vector type, don't flatten matrix\n");
426 return false;
428 break;
429 case GIMPLE_ASM:
430 /* Asm code could contain vector operations. */
431 return false;
432 break;
433 default:
434 break;
436 return true;
439 /* Return false if there are hand-written vectors in the program.
440 We disable the flattening in such a case. */
441 static bool
442 may_flatten_matrices (struct cgraph_node *node)
444 tree decl;
445 struct function *func;
446 basic_block bb;
447 gimple_stmt_iterator gsi;
449 decl = node->decl;
450 if (node->analyzed)
452 func = DECL_STRUCT_FUNCTION (decl);
453 FOR_EACH_BB_FN (bb, func)
454 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
455 if (!may_flatten_matrices_1 (gsi_stmt (gsi)))
456 return false;
458 return true;
461 /* Given a VAR_DECL, check its type to determine whether it is
462 a definition of a dynamic allocated matrix and therefore is
463 a suitable candidate for the matrix flattening optimization.
464 Return NULL if VAR_DECL is not such decl. Otherwise, allocate
465 a MATRIX_INFO structure, fill it with the relevant information
466 and return a pointer to it.
467 TODO: handle also statically defined arrays. */
468 static struct matrix_info *
469 analyze_matrix_decl (tree var_decl)
471 struct matrix_info *m_node, tmpmi, *mi;
472 tree var_type;
473 int dim_num = 0;
475 gcc_assert (matrices_to_reorg);
477 if (TREE_CODE (var_decl) == PARM_DECL)
478 var_type = DECL_ARG_TYPE (var_decl);
479 else if (TREE_CODE (var_decl) == VAR_DECL)
480 var_type = TREE_TYPE (var_decl);
481 else
482 return NULL;
484 if (!POINTER_TYPE_P (var_type))
485 return NULL;
487 while (POINTER_TYPE_P (var_type))
489 var_type = TREE_TYPE (var_type);
490 dim_num++;
493 if (dim_num <= 1)
494 return NULL;
496 if (!COMPLETE_TYPE_P (var_type)
497 || TREE_CODE (TYPE_SIZE_UNIT (var_type)) != INTEGER_CST)
498 return NULL;
500 /* Check to see if this pointer is already in there. */
501 tmpmi.decl = var_decl;
502 mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi);
504 if (mi)
505 return NULL;
507 /* Record the matrix. */
509 m_node = (struct matrix_info *) xcalloc (1, sizeof (struct matrix_info));
510 m_node->decl = var_decl;
511 m_node->num_dims = dim_num;
512 m_node->free_stmts
513 = (struct free_info *) xcalloc (dim_num, sizeof (struct free_info));
515 /* Init min_indirect_level_escape to -1 to indicate that no escape
516 analysis has been done yet. */
517 m_node->min_indirect_level_escape = -1;
518 m_node->is_transposed_p = false;
520 return m_node;
523 /* Free matrix E. */
524 static void
525 mat_free (void *e)
527 struct matrix_info *mat = (struct matrix_info *) e;
529 if (!mat)
530 return;
532 free (mat->free_stmts);
533 free (mat->dim_hot_level);
534 free (mat->malloc_for_level);
537 /* Find all potential matrices.
538 TODO: currently we handle only multidimensional
539 dynamically allocated arrays. */
540 static void
541 find_matrices_decl (void)
543 struct matrix_info *tmp;
544 PTR *slot;
545 struct varpool_node *vnode;
547 gcc_assert (matrices_to_reorg);
549 /* For every global variable in the program:
550 Check to see if it's of a candidate type and record it. */
551 for (vnode = varpool_nodes_queue; vnode; vnode = vnode->next_needed)
553 tree var_decl = vnode->decl;
555 if (!var_decl || TREE_CODE (var_decl) != VAR_DECL)
556 continue;
558 if (matrices_to_reorg)
559 if ((tmp = analyze_matrix_decl (var_decl)))
561 if (!TREE_ADDRESSABLE (var_decl))
563 slot = htab_find_slot (matrices_to_reorg, tmp, INSERT);
564 *slot = tmp;
568 return;
571 /* Mark that the matrix MI escapes at level L. */
572 static void
573 mark_min_matrix_escape_level (struct matrix_info *mi, int l, gimple s)
575 if (mi->min_indirect_level_escape == -1
576 || (mi->min_indirect_level_escape > l))
578 mi->min_indirect_level_escape = l;
579 mi->min_indirect_level_escape_stmt = s;
583 /* Find if the SSA variable is accessed inside the
584 tree and record the tree containing it.
585 The only relevant uses are the case of SSA_NAME, or SSA inside
586 MEM_REF, PLUS_EXPR, POINTER_PLUS_EXPR, MULT_EXPR. */
587 static void
588 ssa_accessed_in_tree (tree t, struct ssa_acc_in_tree *a)
590 a->t_code = TREE_CODE (t);
591 switch (a->t_code)
593 case SSA_NAME:
594 if (t == a->ssa_var)
595 a->var_found = true;
596 break;
597 case MEM_REF:
598 if (SSA_VAR_P (TREE_OPERAND (t, 0))
599 && TREE_OPERAND (t, 0) == a->ssa_var)
600 a->var_found = true;
601 break;
602 default:
603 break;
607 /* Find if the SSA variable is accessed on the right hand side of
608 gimple call STMT. */
610 static void
611 ssa_accessed_in_call_rhs (gimple stmt, struct ssa_acc_in_tree *a)
613 tree decl;
614 tree arg;
615 size_t i;
617 a->t_code = CALL_EXPR;
618 for (i = 0; i < gimple_call_num_args (stmt); i++)
620 arg = gimple_call_arg (stmt, i);
621 if (arg == a->ssa_var)
623 a->var_found = true;
624 decl = gimple_call_fndecl (stmt);
625 a->t_tree = decl;
626 break;
631 /* Find if the SSA variable is accessed on the right hand side of
632 gimple assign STMT. */
634 static void
635 ssa_accessed_in_assign_rhs (gimple stmt, struct ssa_acc_in_tree *a)
638 a->t_code = gimple_assign_rhs_code (stmt);
639 switch (a->t_code)
641 tree op1, op2;
643 case SSA_NAME:
644 case MEM_REF:
645 CASE_CONVERT:
646 case VIEW_CONVERT_EXPR:
647 ssa_accessed_in_tree (gimple_assign_rhs1 (stmt), a);
648 break;
649 case POINTER_PLUS_EXPR:
650 case PLUS_EXPR:
651 case MULT_EXPR:
652 op1 = gimple_assign_rhs1 (stmt);
653 op2 = gimple_assign_rhs2 (stmt);
655 if (op1 == a->ssa_var)
657 a->var_found = true;
658 a->second_op = op2;
660 else if (op2 == a->ssa_var)
662 a->var_found = true;
663 a->second_op = op1;
665 break;
666 default:
667 break;
671 /* Record the access/allocation site information for matrix MI so we can
672 handle it later in transformation. */
673 static void
674 record_access_alloc_site_info (struct matrix_info *mi, gimple stmt, tree offset,
675 tree index, int level, bool is_alloc)
677 struct access_site_info *acc_info;
679 if (!mi->access_l)
680 mi->access_l = VEC_alloc (access_site_info_p, heap, 100);
682 acc_info
683 = (struct access_site_info *)
684 xcalloc (1, sizeof (struct access_site_info));
685 acc_info->stmt = stmt;
686 acc_info->offset = offset;
687 acc_info->index = index;
688 acc_info->function_decl = current_function_decl;
689 acc_info->level = level;
690 acc_info->is_alloc = is_alloc;
692 VEC_safe_push (access_site_info_p, heap, mi->access_l, acc_info);
696 /* Record the malloc as the allocation site of the given LEVEL. But
697 first we Make sure that all the size parameters passed to malloc in
698 all the allocation sites could be pre-calculated before the call to
699 the malloc of level 0 (the main malloc call). */
700 static void
701 add_allocation_site (struct matrix_info *mi, gimple stmt, int level)
703 struct malloc_call_data mcd;
705 /* Make sure that the allocation sites are in the same function. */
706 if (!mi->allocation_function_decl)
707 mi->allocation_function_decl = current_function_decl;
708 else if (mi->allocation_function_decl != current_function_decl)
710 int min_malloc_level;
712 gcc_assert (mi->malloc_for_level);
714 /* Find the minimum malloc level that already has been seen;
715 we known its allocation function must be
716 MI->allocation_function_decl since it's different than
717 CURRENT_FUNCTION_DECL then the escaping level should be
718 MIN (LEVEL, MIN_MALLOC_LEVEL) - 1 , and the allocation function
719 must be set accordingly. */
720 for (min_malloc_level = 0;
721 min_malloc_level < mi->max_malloced_level
722 && mi->malloc_for_level[min_malloc_level]; min_malloc_level++);
723 if (level < min_malloc_level)
725 mi->allocation_function_decl = current_function_decl;
726 mark_min_matrix_escape_level (mi, min_malloc_level, stmt);
728 else
730 mark_min_matrix_escape_level (mi, level, stmt);
731 /* cannot be that (level == min_malloc_level)
732 we would have returned earlier. */
733 return;
737 /* Find the correct malloc information. */
738 collect_data_for_malloc_call (stmt, &mcd);
740 /* We accept only calls to malloc function; we do not accept
741 calls like calloc and realloc. */
742 if (!mi->malloc_for_level)
744 mi->malloc_for_level = XCNEWVEC (gimple, level + 1);
745 mi->max_malloced_level = level + 1;
747 else if (mi->max_malloced_level <= level)
749 mi->malloc_for_level
750 = XRESIZEVEC (gimple, mi->malloc_for_level, level + 1);
752 /* Zero the newly allocated items. */
753 memset (&(mi->malloc_for_level[mi->max_malloced_level + 1]),
754 0, (level - mi->max_malloced_level) * sizeof (tree));
756 mi->max_malloced_level = level + 1;
758 mi->malloc_for_level[level] = stmt;
761 /* Given an assignment statement STMT that we know that its
762 left-hand-side is the matrix MI variable, we traverse the immediate
763 uses backwards until we get to a malloc site. We make sure that
764 there is one and only one malloc site that sets this variable. When
765 we are performing the flattening we generate a new variable that
766 will hold the size for each dimension; each malloc that allocates a
767 dimension has the size parameter; we use that parameter to
768 initialize the dimension size variable so we can use it later in
769 the address calculations. LEVEL is the dimension we're inspecting.
770 Return if STMT is related to an allocation site. */
772 static void
773 analyze_matrix_allocation_site (struct matrix_info *mi, gimple stmt,
774 int level, sbitmap visited)
776 if (gimple_assign_copy_p (stmt) || gimple_assign_cast_p (stmt))
778 tree rhs = gimple_assign_rhs1 (stmt);
780 if (TREE_CODE (rhs) == SSA_NAME)
782 gimple def = SSA_NAME_DEF_STMT (rhs);
784 analyze_matrix_allocation_site (mi, def, level, visited);
785 return;
787 /* If we are back to the original matrix variable then we
788 are sure that this is analyzed as an access site. */
789 else if (rhs == mi->decl)
790 return;
792 /* A result of call to malloc. */
793 else if (is_gimple_call (stmt))
795 int call_flags = gimple_call_flags (stmt);
797 if (!(call_flags & ECF_MALLOC))
799 mark_min_matrix_escape_level (mi, level, stmt);
800 return;
802 else
804 tree malloc_fn_decl;
806 malloc_fn_decl = gimple_call_fndecl (stmt);
807 if (malloc_fn_decl == NULL_TREE)
809 mark_min_matrix_escape_level (mi, level, stmt);
810 return;
812 if (DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
814 if (dump_file)
815 fprintf (dump_file,
816 "Matrix %s is an argument to function %s\n",
817 get_name (mi->decl), get_name (malloc_fn_decl));
818 mark_min_matrix_escape_level (mi, level, stmt);
819 return;
822 /* This is a call to malloc of level 'level'.
823 mi->max_malloced_level-1 == level means that we've
824 seen a malloc statement of level 'level' before.
825 If the statement is not the same one that we've
826 seen before, then there's another malloc statement
827 for the same level, which means that we need to mark
828 it escaping. */
829 if (mi->malloc_for_level
830 && mi->max_malloced_level-1 == level
831 && mi->malloc_for_level[level] != stmt)
833 mark_min_matrix_escape_level (mi, level, stmt);
834 return;
836 else
837 add_allocation_site (mi, stmt, level);
838 return;
840 /* Looks like we don't know what is happening in this
841 statement so be in the safe side and mark it as escaping. */
842 mark_min_matrix_escape_level (mi, level, stmt);
845 /* The transposing decision making.
846 In order to calculate the profitability of transposing, we collect two
847 types of information regarding the accesses:
848 1. profiling information used to express the hotness of an access, that
849 is how often the matrix is accessed by this access site (count of the
850 access site).
851 2. which dimension in the access site is iterated by the inner
852 most loop containing this access.
854 The matrix will have a calculated value of weighted hotness for each
855 dimension.
856 Intuitively the hotness level of a dimension is a function of how
857 many times it was the most frequently accessed dimension in the
858 highly executed access sites of this matrix.
860 As computed by following equation:
862 __ __
863 \ \ dim_hot_level[i] +=
864 /_ /_
866 acc[j]->dim[i]->iter_by_inner_loop * count(j)
868 Where n is the number of dims and m is the number of the matrix
869 access sites. acc[j]->dim[i]->iter_by_inner_loop is 1 if acc[j]
870 iterates over dim[i] in innermost loop, and is 0 otherwise.
872 The organization of the new matrix should be according to the
873 hotness of each dimension. The hotness of the dimension implies
874 the locality of the elements.*/
875 static int
876 analyze_transpose (void **slot, void *data ATTRIBUTE_UNUSED)
878 struct matrix_info *mi = (struct matrix_info *) *slot;
879 int min_escape_l = mi->min_indirect_level_escape;
880 struct loop *loop;
881 affine_iv iv;
882 struct access_site_info *acc_info;
883 int i;
885 if (min_escape_l < 2 || !mi->access_l)
887 if (mi->access_l)
889 FOR_EACH_VEC_ELT (access_site_info_p, mi->access_l, i, acc_info)
890 free (acc_info);
891 VEC_free (access_site_info_p, heap, mi->access_l);
894 return 1;
896 if (!mi->dim_hot_level)
897 mi->dim_hot_level =
898 (gcov_type *) xcalloc (min_escape_l, sizeof (gcov_type));
901 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
902 i++)
904 if (gimple_assign_rhs_code (acc_info->stmt) == POINTER_PLUS_EXPR
905 && acc_info->level < min_escape_l)
907 loop = loop_containing_stmt (acc_info->stmt);
908 if (!loop || loop->inner)
910 free (acc_info);
911 continue;
913 if (simple_iv (loop, loop, acc_info->offset, &iv, true))
915 if (iv.step != NULL)
917 HOST_WIDE_INT istep;
919 istep = int_cst_value (iv.step);
920 if (istep != 0)
922 acc_info->iterated_by_inner_most_loop_p = 1;
923 mi->dim_hot_level[acc_info->level] +=
924 gimple_bb (acc_info->stmt)->count;
930 free (acc_info);
932 VEC_free (access_site_info_p, heap, mi->access_l);
934 return 1;
937 /* Find the index which defines the OFFSET from base.
938 We walk from use to def until we find how the offset was defined. */
939 static tree
940 get_index_from_offset (tree offset, gimple def_stmt)
942 tree op1, op2, index;
944 if (gimple_code (def_stmt) == GIMPLE_PHI)
945 return NULL;
946 if ((gimple_assign_copy_p (def_stmt) || gimple_assign_cast_p (def_stmt))
947 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
948 return get_index_from_offset (offset,
949 SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt)));
950 else if (is_gimple_assign (def_stmt)
951 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
953 op1 = gimple_assign_rhs1 (def_stmt);
954 op2 = gimple_assign_rhs2 (def_stmt);
955 if (TREE_CODE (op1) != INTEGER_CST && TREE_CODE (op2) != INTEGER_CST)
956 return NULL;
957 index = (TREE_CODE (op1) == INTEGER_CST) ? op2 : op1;
958 return index;
960 else
961 return NULL_TREE;
964 /* update MI->dimension_type_size[CURRENT_INDIRECT_LEVEL] with the size
965 of the type related to the SSA_VAR, or the type related to the
966 lhs of STMT, in the case that it is an MEM_REF. */
967 static void
968 update_type_size (struct matrix_info *mi, gimple stmt, tree ssa_var,
969 int current_indirect_level)
971 tree lhs;
972 HOST_WIDE_INT type_size;
974 /* Update type according to the type of the MEM_REF expr. */
975 if (is_gimple_assign (stmt)
976 && TREE_CODE (gimple_assign_lhs (stmt)) == MEM_REF)
978 lhs = gimple_assign_lhs (stmt);
979 gcc_assert (POINTER_TYPE_P
980 (TREE_TYPE (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
981 type_size =
982 int_size_in_bytes (TREE_TYPE
983 (TREE_TYPE
984 (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
986 else
987 type_size = int_size_in_bytes (TREE_TYPE (ssa_var));
989 /* Record the size of elements accessed (as a whole)
990 in the current indirection level (dimension). If the size of
991 elements is not known at compile time, mark it as escaping. */
992 if (type_size <= 0)
993 mark_min_matrix_escape_level (mi, current_indirect_level, stmt);
994 else
996 int l = current_indirect_level;
998 if (!mi->dimension_type_size)
1000 mi->dimension_type_size
1001 = (HOST_WIDE_INT *) xcalloc (l + 1, sizeof (HOST_WIDE_INT));
1002 mi->dimension_type_size_len = l + 1;
1004 else if (mi->dimension_type_size_len < l + 1)
1006 mi->dimension_type_size
1007 = (HOST_WIDE_INT *) xrealloc (mi->dimension_type_size,
1008 (l + 1) * sizeof (HOST_WIDE_INT));
1009 memset (&mi->dimension_type_size[mi->dimension_type_size_len],
1010 0, (l + 1 - mi->dimension_type_size_len)
1011 * sizeof (HOST_WIDE_INT));
1012 mi->dimension_type_size_len = l + 1;
1014 /* Make sure all the accesses in the same level have the same size
1015 of the type. */
1016 if (!mi->dimension_type_size[l])
1017 mi->dimension_type_size[l] = type_size;
1018 else if (mi->dimension_type_size[l] != type_size)
1019 mark_min_matrix_escape_level (mi, l, stmt);
1023 /* USE_STMT represents a GIMPLE_CALL, where one of the arguments is the
1024 ssa var that we want to check because it came from some use of matrix
1025 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so
1026 far. */
1028 static int
1029 analyze_accesses_for_call_stmt (struct matrix_info *mi, tree ssa_var,
1030 gimple use_stmt, int current_indirect_level)
1032 tree fndecl = gimple_call_fndecl (use_stmt);
1034 if (gimple_call_lhs (use_stmt))
1036 tree lhs = gimple_call_lhs (use_stmt);
1037 struct ssa_acc_in_tree lhs_acc, rhs_acc;
1039 memset (&lhs_acc, 0, sizeof (lhs_acc));
1040 memset (&rhs_acc, 0, sizeof (rhs_acc));
1042 lhs_acc.ssa_var = ssa_var;
1043 lhs_acc.t_code = ERROR_MARK;
1044 ssa_accessed_in_tree (lhs, &lhs_acc);
1045 rhs_acc.ssa_var = ssa_var;
1046 rhs_acc.t_code = ERROR_MARK;
1047 ssa_accessed_in_call_rhs (use_stmt, &rhs_acc);
1049 /* The SSA must be either in the left side or in the right side,
1050 to understand what is happening.
1051 In case the SSA_NAME is found in both sides we should be escaping
1052 at this level because in this case we cannot calculate the
1053 address correctly. */
1054 if ((lhs_acc.var_found && rhs_acc.var_found
1055 && lhs_acc.t_code == MEM_REF)
1056 || (!rhs_acc.var_found && !lhs_acc.var_found))
1058 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1059 return current_indirect_level;
1061 gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1063 /* If we are storing to the matrix at some level, then mark it as
1064 escaping at that level. */
1065 if (lhs_acc.var_found)
1067 int l = current_indirect_level + 1;
1069 gcc_assert (lhs_acc.t_code == MEM_REF);
1070 mark_min_matrix_escape_level (mi, l, use_stmt);
1071 return current_indirect_level;
1075 if (fndecl)
1077 if (DECL_FUNCTION_CODE (fndecl) != BUILT_IN_FREE)
1079 if (dump_file)
1080 fprintf (dump_file,
1081 "Matrix %s: Function call %s, level %d escapes.\n",
1082 get_name (mi->decl), get_name (fndecl),
1083 current_indirect_level);
1084 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1086 else if (mi->free_stmts[current_indirect_level].stmt != NULL
1087 && mi->free_stmts[current_indirect_level].stmt != use_stmt)
1088 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1089 else
1091 /*Record the free statements so we can delete them
1092 later. */
1093 int l = current_indirect_level;
1095 mi->free_stmts[l].stmt = use_stmt;
1096 mi->free_stmts[l].func = current_function_decl;
1099 return current_indirect_level;
1102 /* USE_STMT represents a phi node of the ssa var that we want to
1103 check because it came from some use of matrix
1105 We check all the escaping levels that get to the PHI node
1106 and make sure they are all the same escaping;
1107 if not (which is rare) we let the escaping level be the
1108 minimum level that gets into that PHI because starting from
1109 that level we cannot expect the behavior of the indirections.
1110 CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1112 static void
1113 analyze_accesses_for_phi_node (struct matrix_info *mi, gimple use_stmt,
1114 int current_indirect_level, sbitmap visited,
1115 bool record_accesses)
1118 struct matrix_access_phi_node tmp_maphi, *maphi, **pmaphi;
1120 tmp_maphi.phi = use_stmt;
1121 if ((maphi = (struct matrix_access_phi_node *)
1122 htab_find (htab_mat_acc_phi_nodes, &tmp_maphi)))
1124 if (maphi->indirection_level == current_indirect_level)
1125 return;
1126 else
1128 int level = MIN (maphi->indirection_level,
1129 current_indirect_level);
1130 size_t j;
1131 gimple stmt = NULL;
1133 maphi->indirection_level = level;
1134 for (j = 0; j < gimple_phi_num_args (use_stmt); j++)
1136 tree def = PHI_ARG_DEF (use_stmt, j);
1138 if (gimple_code (SSA_NAME_DEF_STMT (def)) != GIMPLE_PHI)
1139 stmt = SSA_NAME_DEF_STMT (def);
1141 mark_min_matrix_escape_level (mi, level, stmt);
1143 return;
1145 maphi = (struct matrix_access_phi_node *)
1146 xcalloc (1, sizeof (struct matrix_access_phi_node));
1147 maphi->phi = use_stmt;
1148 maphi->indirection_level = current_indirect_level;
1150 /* Insert to hash table. */
1151 pmaphi = (struct matrix_access_phi_node **)
1152 htab_find_slot (htab_mat_acc_phi_nodes, maphi, INSERT);
1153 gcc_assert (pmaphi);
1154 *pmaphi = maphi;
1156 if (!TEST_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt))))
1158 SET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1159 analyze_matrix_accesses (mi, PHI_RESULT (use_stmt),
1160 current_indirect_level, false, visited,
1161 record_accesses);
1162 RESET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1166 /* USE_STMT represents an assign statement (the rhs or lhs include
1167 the ssa var that we want to check because it came from some use of matrix
1168 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1170 static int
1171 analyze_accesses_for_assign_stmt (struct matrix_info *mi, tree ssa_var,
1172 gimple use_stmt, int current_indirect_level,
1173 bool last_op, sbitmap visited,
1174 bool record_accesses)
1176 tree lhs = gimple_get_lhs (use_stmt);
1177 struct ssa_acc_in_tree lhs_acc, rhs_acc;
1179 memset (&lhs_acc, 0, sizeof (lhs_acc));
1180 memset (&rhs_acc, 0, sizeof (rhs_acc));
1182 lhs_acc.ssa_var = ssa_var;
1183 lhs_acc.t_code = ERROR_MARK;
1184 ssa_accessed_in_tree (lhs, &lhs_acc);
1185 rhs_acc.ssa_var = ssa_var;
1186 rhs_acc.t_code = ERROR_MARK;
1187 ssa_accessed_in_assign_rhs (use_stmt, &rhs_acc);
1189 /* The SSA must be either in the left side or in the right side,
1190 to understand what is happening.
1191 In case the SSA_NAME is found in both sides we should be escaping
1192 at this level because in this case we cannot calculate the
1193 address correctly. */
1194 if ((lhs_acc.var_found && rhs_acc.var_found
1195 && lhs_acc.t_code == MEM_REF)
1196 || (!rhs_acc.var_found && !lhs_acc.var_found))
1198 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1199 return current_indirect_level;
1201 gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1203 /* If we are storing to the matrix at some level, then mark it as
1204 escaping at that level. */
1205 if (lhs_acc.var_found)
1207 int l = current_indirect_level + 1;
1209 gcc_assert (lhs_acc.t_code == MEM_REF);
1211 if (!(gimple_assign_copy_p (use_stmt)
1212 || gimple_assign_cast_p (use_stmt))
1213 || (TREE_CODE (gimple_assign_rhs1 (use_stmt)) != SSA_NAME))
1214 mark_min_matrix_escape_level (mi, l, use_stmt);
1215 else
1217 gimple def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (use_stmt));
1218 analyze_matrix_allocation_site (mi, def_stmt, l, visited);
1219 if (record_accesses)
1220 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1221 NULL_TREE, l, true);
1222 update_type_size (mi, use_stmt, NULL, l);
1224 return current_indirect_level;
1226 /* Now, check the right-hand-side, to see how the SSA variable
1227 is used. */
1228 if (rhs_acc.var_found)
1230 if (rhs_acc.t_code != MEM_REF
1231 && rhs_acc.t_code != POINTER_PLUS_EXPR && rhs_acc.t_code != SSA_NAME)
1233 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1234 return current_indirect_level;
1236 /* If the access in the RHS has an indirection increase the
1237 indirection level. */
1238 if (rhs_acc.t_code == MEM_REF)
1240 if (record_accesses)
1241 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1242 NULL_TREE,
1243 current_indirect_level, true);
1244 current_indirect_level += 1;
1246 else if (rhs_acc.t_code == POINTER_PLUS_EXPR)
1248 gcc_assert (rhs_acc.second_op);
1249 if (last_op)
1250 /* Currently we support only one PLUS expression on the
1251 SSA_NAME that holds the base address of the current
1252 indirection level; to support more general case there
1253 is a need to hold a stack of expressions and regenerate
1254 the calculation later. */
1255 mark_min_matrix_escape_level (mi, current_indirect_level,
1256 use_stmt);
1257 else
1259 tree index;
1260 tree op1, op2;
1262 op1 = gimple_assign_rhs1 (use_stmt);
1263 op2 = gimple_assign_rhs2 (use_stmt);
1265 op2 = (op1 == ssa_var) ? op2 : op1;
1266 if (TREE_CODE (op2) == INTEGER_CST)
1267 index =
1268 build_int_cst (TREE_TYPE (op1),
1269 TREE_INT_CST_LOW (op2) /
1270 int_size_in_bytes (TREE_TYPE (op1)));
1271 else
1273 index =
1274 get_index_from_offset (op2, SSA_NAME_DEF_STMT (op2));
1275 if (index == NULL_TREE)
1277 mark_min_matrix_escape_level (mi,
1278 current_indirect_level,
1279 use_stmt);
1280 return current_indirect_level;
1283 if (record_accesses)
1284 record_access_alloc_site_info (mi, use_stmt, op2,
1285 index,
1286 current_indirect_level, false);
1289 /* If we are storing this level of indirection mark it as
1290 escaping. */
1291 if (lhs_acc.t_code == MEM_REF || TREE_CODE (lhs) != SSA_NAME)
1293 int l = current_indirect_level;
1295 /* One exception is when we are storing to the matrix
1296 variable itself; this is the case of malloc, we must make
1297 sure that it's the one and only one call to malloc so
1298 we call analyze_matrix_allocation_site to check
1299 this out. */
1300 if (TREE_CODE (lhs) != VAR_DECL || lhs != mi->decl)
1301 mark_min_matrix_escape_level (mi, current_indirect_level,
1302 use_stmt);
1303 else
1305 /* Also update the escaping level. */
1306 analyze_matrix_allocation_site (mi, use_stmt, l, visited);
1307 if (record_accesses)
1308 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1309 NULL_TREE, l, true);
1312 else
1314 /* We are placing it in an SSA, follow that SSA. */
1315 analyze_matrix_accesses (mi, lhs,
1316 current_indirect_level,
1317 rhs_acc.t_code == POINTER_PLUS_EXPR,
1318 visited, record_accesses);
1321 return current_indirect_level;
1324 /* Given a SSA_VAR (coming from a use statement of the matrix MI),
1325 follow its uses and level of indirection and find out the minimum
1326 indirection level it escapes in (the highest dimension) and the maximum
1327 level it is accessed in (this will be the actual dimension of the
1328 matrix). The information is accumulated in MI.
1329 We look at the immediate uses, if one escapes we finish; if not,
1330 we make a recursive call for each one of the immediate uses of the
1331 resulting SSA name. */
1332 static void
1333 analyze_matrix_accesses (struct matrix_info *mi, tree ssa_var,
1334 int current_indirect_level, bool last_op,
1335 sbitmap visited, bool record_accesses)
1337 imm_use_iterator imm_iter;
1338 use_operand_p use_p;
1340 update_type_size (mi, SSA_NAME_DEF_STMT (ssa_var), ssa_var,
1341 current_indirect_level);
1343 /* We don't go beyond the escaping level when we are performing the
1344 flattening. NOTE: we keep the last indirection level that doesn't
1345 escape. */
1346 if (mi->min_indirect_level_escape > -1
1347 && mi->min_indirect_level_escape <= current_indirect_level)
1348 return;
1350 /* Now go over the uses of the SSA_NAME and check how it is used in
1351 each one of them. We are mainly looking for the pattern MEM_REF,
1352 then a POINTER_PLUS_EXPR, then MEM_REF etc. while in between there could
1353 be any number of copies and casts. */
1354 gcc_assert (TREE_CODE (ssa_var) == SSA_NAME);
1356 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_var)
1358 gimple use_stmt = USE_STMT (use_p);
1359 if (gimple_code (use_stmt) == GIMPLE_PHI)
1360 /* We check all the escaping levels that get to the PHI node
1361 and make sure they are all the same escaping;
1362 if not (which is rare) we let the escaping level be the
1363 minimum level that gets into that PHI because starting from
1364 that level we cannot expect the behavior of the indirections. */
1366 analyze_accesses_for_phi_node (mi, use_stmt, current_indirect_level,
1367 visited, record_accesses);
1369 else if (is_gimple_call (use_stmt))
1370 analyze_accesses_for_call_stmt (mi, ssa_var, use_stmt,
1371 current_indirect_level);
1372 else if (is_gimple_assign (use_stmt))
1373 current_indirect_level =
1374 analyze_accesses_for_assign_stmt (mi, ssa_var, use_stmt,
1375 current_indirect_level, last_op,
1376 visited, record_accesses);
1380 typedef struct
1382 tree fn;
1383 gimple stmt;
1384 } check_var_data;
1386 /* A walk_tree function to go over the VAR_DECL, PARM_DECL nodes of
1387 the malloc size expression and check that those aren't changed
1388 over the function. */
1389 static tree
1390 check_var_notmodified_p (tree * tp, int *walk_subtrees, void *data)
1392 basic_block bb;
1393 tree t = *tp;
1394 check_var_data *callback_data = (check_var_data*) data;
1395 tree fn = callback_data->fn;
1396 gimple_stmt_iterator gsi;
1397 gimple stmt;
1399 if (TREE_CODE (t) != VAR_DECL && TREE_CODE (t) != PARM_DECL)
1400 return NULL_TREE;
1402 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (fn))
1404 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1406 stmt = gsi_stmt (gsi);
1407 if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
1408 continue;
1409 if (gimple_get_lhs (stmt) == t)
1411 callback_data->stmt = stmt;
1412 return t;
1416 *walk_subtrees = 1;
1417 return NULL_TREE;
1420 /* Go backwards in the use-def chains and find out the expression
1421 represented by the possible SSA name in STMT, until it is composed
1422 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1423 we make sure that all the arguments represent the same subexpression,
1424 otherwise we fail. */
1426 static tree
1427 can_calculate_stmt_before_stmt (gimple stmt, sbitmap visited)
1429 tree op1, op2, res;
1430 enum tree_code code;
1432 switch (gimple_code (stmt))
1434 case GIMPLE_ASSIGN:
1435 code = gimple_assign_rhs_code (stmt);
1436 op1 = gimple_assign_rhs1 (stmt);
1438 switch (code)
1440 case POINTER_PLUS_EXPR:
1441 case PLUS_EXPR:
1442 case MINUS_EXPR:
1443 case MULT_EXPR:
1445 op2 = gimple_assign_rhs2 (stmt);
1446 op1 = can_calculate_expr_before_stmt (op1, visited);
1447 if (!op1)
1448 return NULL_TREE;
1449 op2 = can_calculate_expr_before_stmt (op2, visited);
1450 if (op2)
1451 return fold_build2 (code, gimple_expr_type (stmt), op1, op2);
1452 return NULL_TREE;
1454 CASE_CONVERT:
1455 res = can_calculate_expr_before_stmt (op1, visited);
1456 if (res != NULL_TREE)
1457 return build1 (code, gimple_expr_type (stmt), res);
1458 else
1459 return NULL_TREE;
1461 default:
1462 if (gimple_assign_single_p (stmt))
1463 return can_calculate_expr_before_stmt (op1, visited);
1464 else
1465 return NULL_TREE;
1468 case GIMPLE_PHI:
1470 size_t j;
1472 res = NULL_TREE;
1473 /* Make sure all the arguments represent the same value. */
1474 for (j = 0; j < gimple_phi_num_args (stmt); j++)
1476 tree new_res;
1477 tree def = PHI_ARG_DEF (stmt, j);
1479 new_res = can_calculate_expr_before_stmt (def, visited);
1480 if (res == NULL_TREE)
1481 res = new_res;
1482 else if (!new_res || !expressions_equal_p (res, new_res))
1483 return NULL_TREE;
1485 return res;
1488 default:
1489 return NULL_TREE;
1493 /* Go backwards in the use-def chains and find out the expression
1494 represented by the possible SSA name in EXPR, until it is composed
1495 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1496 we make sure that all the arguments represent the same subexpression,
1497 otherwise we fail. */
1498 static tree
1499 can_calculate_expr_before_stmt (tree expr, sbitmap visited)
1501 gimple def_stmt;
1502 tree res;
1504 switch (TREE_CODE (expr))
1506 case SSA_NAME:
1507 /* Case of loop, we don't know to represent this expression. */
1508 if (TEST_BIT (visited, SSA_NAME_VERSION (expr)))
1509 return NULL_TREE;
1511 SET_BIT (visited, SSA_NAME_VERSION (expr));
1512 def_stmt = SSA_NAME_DEF_STMT (expr);
1513 res = can_calculate_stmt_before_stmt (def_stmt, visited);
1514 RESET_BIT (visited, SSA_NAME_VERSION (expr));
1515 return res;
1516 case VAR_DECL:
1517 case PARM_DECL:
1518 case INTEGER_CST:
1519 return expr;
1521 default:
1522 return NULL_TREE;
1526 /* There should be only one allocation function for the dimensions
1527 that don't escape. Here we check the allocation sites in this
1528 function. We must make sure that all the dimensions are allocated
1529 using malloc and that the malloc size parameter expression could be
1530 pre-calculated before the call to the malloc of dimension 0.
1532 Given a candidate matrix for flattening -- MI -- check if it's
1533 appropriate for flattening -- we analyze the allocation
1534 sites that we recorded in the previous analysis. The result of the
1535 analysis is a level of indirection (matrix dimension) in which the
1536 flattening is safe. We check the following conditions:
1537 1. There is only one allocation site for each dimension.
1538 2. The allocation sites of all the dimensions are in the same
1539 function.
1540 (The above two are being taken care of during the analysis when
1541 we check the allocation site).
1542 3. All the dimensions that we flatten are allocated at once; thus
1543 the total size must be known before the allocation of the
1544 dimension 0 (top level) -- we must make sure we represent the
1545 size of the allocation as an expression of global parameters or
1546 constants and that those doesn't change over the function. */
1548 static int
1549 check_allocation_function (void **slot, void *data ATTRIBUTE_UNUSED)
1551 int level;
1552 struct matrix_info *mi = (struct matrix_info *) *slot;
1553 sbitmap visited;
1555 if (!mi->malloc_for_level)
1556 return 1;
1558 visited = sbitmap_alloc (num_ssa_names);
1560 /* Do nothing if the current function is not the allocation
1561 function of MI. */
1562 if (mi->allocation_function_decl != current_function_decl
1563 /* We aren't in the main allocation function yet. */
1564 || !mi->malloc_for_level[0])
1565 return 1;
1567 for (level = 1; level < mi->max_malloced_level; level++)
1568 if (!mi->malloc_for_level[level])
1569 break;
1571 mark_min_matrix_escape_level (mi, level, NULL);
1573 /* Check if the expression of the size passed to malloc could be
1574 pre-calculated before the malloc of level 0. */
1575 for (level = 1; level < mi->min_indirect_level_escape; level++)
1577 gimple call_stmt;
1578 tree size;
1579 struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE};
1581 call_stmt = mi->malloc_for_level[level];
1583 /* Find the correct malloc information. */
1584 collect_data_for_malloc_call (call_stmt, &mcd);
1586 /* No need to check anticipation for constants. */
1587 if (TREE_CODE (mcd.size_var) == INTEGER_CST)
1589 if (!mi->dimension_size)
1591 mi->dimension_size =
1592 (tree *) xcalloc (mi->min_indirect_level_escape,
1593 sizeof (tree));
1594 mi->dimension_size_orig =
1595 (tree *) xcalloc (mi->min_indirect_level_escape,
1596 sizeof (tree));
1598 mi->dimension_size[level] = mcd.size_var;
1599 mi->dimension_size_orig[level] = mcd.size_var;
1600 continue;
1602 /* ??? Here we should also add the way to calculate the size
1603 expression not only know that it is anticipated. */
1604 sbitmap_zero (visited);
1605 size = can_calculate_expr_before_stmt (mcd.size_var, visited);
1606 if (size == NULL_TREE)
1608 mark_min_matrix_escape_level (mi, level, call_stmt);
1609 if (dump_file)
1610 fprintf (dump_file,
1611 "Matrix %s: Cannot calculate the size of allocation, escaping at level %d\n",
1612 get_name (mi->decl), level);
1613 break;
1615 if (!mi->dimension_size)
1617 mi->dimension_size =
1618 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1619 mi->dimension_size_orig =
1620 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1622 mi->dimension_size[level] = size;
1623 mi->dimension_size_orig[level] = size;
1626 /* We don't need those anymore. */
1627 for (level = mi->min_indirect_level_escape;
1628 level < mi->max_malloced_level; level++)
1629 mi->malloc_for_level[level] = NULL;
1630 return 1;
1633 /* Track all access and allocation sites. */
1634 static void
1635 find_sites_in_func (bool record)
1637 sbitmap visited_stmts_1;
1639 gimple_stmt_iterator gsi;
1640 gimple stmt;
1641 basic_block bb;
1642 struct matrix_info tmpmi, *mi;
1644 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1646 FOR_EACH_BB (bb)
1648 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1650 tree lhs;
1652 stmt = gsi_stmt (gsi);
1653 lhs = gimple_get_lhs (stmt);
1654 if (lhs != NULL_TREE
1655 && TREE_CODE (lhs) == VAR_DECL)
1657 tmpmi.decl = lhs;
1658 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1659 &tmpmi)))
1661 sbitmap_zero (visited_stmts_1);
1662 analyze_matrix_allocation_site (mi, stmt, 0, visited_stmts_1);
1665 if (is_gimple_assign (stmt)
1666 && gimple_assign_single_p (stmt)
1667 && TREE_CODE (lhs) == SSA_NAME
1668 && TREE_CODE (gimple_assign_rhs1 (stmt)) == VAR_DECL)
1670 tmpmi.decl = gimple_assign_rhs1 (stmt);
1671 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1672 &tmpmi)))
1674 sbitmap_zero (visited_stmts_1);
1675 analyze_matrix_accesses (mi, lhs, 0,
1676 false, visited_stmts_1, record);
1681 sbitmap_free (visited_stmts_1);
1684 /* Traverse the use-def chains to see if there are matrices that
1685 are passed through pointers and we cannot know how they are accessed.
1686 For each SSA-name defined by a global variable of our interest,
1687 we traverse the use-def chains of the SSA and follow the indirections,
1688 and record in what level of indirection the use of the variable
1689 escapes. A use of a pointer escapes when it is passed to a function,
1690 stored into memory or assigned (except in malloc and free calls). */
1692 static void
1693 record_all_accesses_in_func (void)
1695 unsigned i;
1696 sbitmap visited_stmts_1;
1698 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1700 for (i = 0; i < num_ssa_names; i++)
1702 struct matrix_info tmpmi, *mi;
1703 tree ssa_var = ssa_name (i);
1704 tree rhs, lhs;
1706 if (!ssa_var
1707 || !is_gimple_assign (SSA_NAME_DEF_STMT (ssa_var))
1708 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (ssa_var)))
1709 continue;
1710 rhs = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (ssa_var));
1711 lhs = gimple_assign_lhs (SSA_NAME_DEF_STMT (ssa_var));
1712 if (TREE_CODE (rhs) != VAR_DECL && TREE_CODE (lhs) != VAR_DECL)
1713 continue;
1715 /* If the RHS is a matrix that we want to analyze, follow the def-use
1716 chain for this SSA_VAR and check for escapes or apply the
1717 flattening. */
1718 tmpmi.decl = rhs;
1719 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi)))
1721 /* This variable will track the visited PHI nodes, so we can limit
1722 its size to the maximum number of SSA names. */
1723 sbitmap_zero (visited_stmts_1);
1724 analyze_matrix_accesses (mi, ssa_var,
1725 0, false, visited_stmts_1, true);
1729 sbitmap_free (visited_stmts_1);
1732 /* Used when we want to convert the expression: RESULT = something *
1733 ORIG to RESULT = something * NEW_VAL. If ORIG and NEW_VAL are power
1734 of 2, shift operations can be done, else division and
1735 multiplication. */
1737 static tree
1738 compute_offset (HOST_WIDE_INT orig, HOST_WIDE_INT new_val, tree result)
1741 int x, y;
1742 tree result1, ratio, log, orig_tree, new_tree;
1744 x = exact_log2 (orig);
1745 y = exact_log2 (new_val);
1747 if (x != -1 && y != -1)
1749 if (x == y)
1750 return result;
1751 else if (x > y)
1753 log = build_int_cst (TREE_TYPE (result), x - y);
1754 result1 =
1755 fold_build2 (LSHIFT_EXPR, TREE_TYPE (result), result, log);
1756 return result1;
1758 log = build_int_cst (TREE_TYPE (result), y - x);
1759 result1 = fold_build2 (RSHIFT_EXPR, TREE_TYPE (result), result, log);
1761 return result1;
1763 orig_tree = build_int_cst (TREE_TYPE (result), orig);
1764 new_tree = build_int_cst (TREE_TYPE (result), new_val);
1765 ratio = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (result), result, orig_tree);
1766 result1 = fold_build2 (MULT_EXPR, TREE_TYPE (result), ratio, new_tree);
1768 return result1;
1772 /* We know that we are allowed to perform matrix flattening (according to the
1773 escape analysis), so we traverse the use-def chains of the SSA vars
1774 defined by the global variables pointing to the matrices of our interest.
1775 in each use of the SSA we calculate the offset from the base address
1776 according to the following equation:
1778 a[I1][I2]...[Ik] , where D1..Dk is the length of each dimension and the
1779 escaping level is m <= k, and a' is the new allocated matrix,
1780 will be translated to :
1782 b[I(m+1)]...[Ik]
1784 where
1785 b = a' + I1*D2...*Dm + I2*D3...Dm + ... + Im
1788 static int
1789 transform_access_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1791 gimple_stmt_iterator gsi;
1792 struct matrix_info *mi = (struct matrix_info *) *slot;
1793 int min_escape_l = mi->min_indirect_level_escape;
1794 struct access_site_info *acc_info;
1795 enum tree_code code;
1796 int i;
1798 if (min_escape_l < 2 || !mi->access_l)
1799 return 1;
1800 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
1801 i++)
1803 /* This is possible because we collect the access sites before
1804 we determine the final minimum indirection level. */
1805 if (acc_info->level >= min_escape_l)
1807 free (acc_info);
1808 continue;
1810 if (acc_info->is_alloc)
1812 if (acc_info->level >= 0 && gimple_bb (acc_info->stmt))
1814 ssa_op_iter iter;
1815 tree def;
1816 gimple stmt = acc_info->stmt;
1817 tree lhs;
1819 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
1820 mark_sym_for_renaming (SSA_NAME_VAR (def));
1821 gsi = gsi_for_stmt (stmt);
1822 gcc_assert (is_gimple_assign (acc_info->stmt));
1823 lhs = gimple_assign_lhs (acc_info->stmt);
1824 if (TREE_CODE (lhs) == SSA_NAME
1825 && acc_info->level < min_escape_l - 1)
1827 imm_use_iterator imm_iter;
1828 use_operand_p use_p;
1829 gimple use_stmt;
1831 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
1832 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1834 tree rhs, tmp;
1835 gimple new_stmt;
1837 gcc_assert (gimple_assign_rhs_code (acc_info->stmt)
1838 == MEM_REF);
1839 /* Emit convert statement to convert to type of use. */
1840 tmp = create_tmp_var (TREE_TYPE (lhs), "new");
1841 add_referenced_var (tmp);
1842 rhs = gimple_assign_rhs1 (acc_info->stmt);
1843 rhs = fold_convert (TREE_TYPE (tmp),
1844 TREE_OPERAND (rhs, 0));
1845 new_stmt = gimple_build_assign (tmp, rhs);
1846 tmp = make_ssa_name (tmp, new_stmt);
1847 gimple_assign_set_lhs (new_stmt, tmp);
1848 gsi = gsi_for_stmt (acc_info->stmt);
1849 gsi_insert_after (&gsi, new_stmt, GSI_SAME_STMT);
1850 SET_USE (use_p, tmp);
1853 if (acc_info->level < min_escape_l - 1)
1854 gsi_remove (&gsi, true);
1856 free (acc_info);
1857 continue;
1859 code = gimple_assign_rhs_code (acc_info->stmt);
1860 if (code == MEM_REF
1861 && acc_info->level < min_escape_l - 1)
1863 /* Replace the MEM_REF with NOP (cast) usually we are casting
1864 from "pointer to type" to "type". */
1865 tree t =
1866 build1 (NOP_EXPR, TREE_TYPE (gimple_assign_rhs1 (acc_info->stmt)),
1867 TREE_OPERAND (gimple_assign_rhs1 (acc_info->stmt), 0));
1868 gimple_assign_set_rhs_code (acc_info->stmt, NOP_EXPR);
1869 gimple_assign_set_rhs1 (acc_info->stmt, t);
1871 else if (code == POINTER_PLUS_EXPR
1872 && acc_info->level < (min_escape_l))
1874 imm_use_iterator imm_iter;
1875 use_operand_p use_p;
1877 tree offset;
1878 int k = acc_info->level;
1879 tree num_elements, total_elements;
1880 tree tmp1;
1881 tree d_size = mi->dimension_size[k];
1883 /* We already make sure in the analysis that the first operand
1884 is the base and the second is the offset. */
1885 offset = acc_info->offset;
1886 if (mi->dim_map[k] == min_escape_l - 1)
1888 if (!check_transpose_p || mi->is_transposed_p == false)
1889 tmp1 = offset;
1890 else
1892 tree new_offset;
1894 new_offset =
1895 compute_offset (mi->dimension_type_size[min_escape_l],
1896 mi->dimension_type_size[k + 1], offset);
1898 total_elements = new_offset;
1899 if (new_offset != offset)
1901 gsi = gsi_for_stmt (acc_info->stmt);
1902 tmp1 = force_gimple_operand_gsi (&gsi, total_elements,
1903 true, NULL,
1904 true, GSI_SAME_STMT);
1906 else
1907 tmp1 = offset;
1910 else
1912 d_size = mi->dimension_size[mi->dim_map[k] + 1];
1913 num_elements =
1914 fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, acc_info->index),
1915 fold_convert (sizetype, d_size));
1916 add_referenced_var (d_size);
1917 gsi = gsi_for_stmt (acc_info->stmt);
1918 tmp1 = force_gimple_operand_gsi (&gsi, num_elements, true,
1919 NULL, true, GSI_SAME_STMT);
1921 /* Replace the offset if needed. */
1922 if (tmp1 != offset)
1924 if (TREE_CODE (offset) == SSA_NAME)
1926 gimple use_stmt;
1928 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, offset)
1929 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1930 if (use_stmt == acc_info->stmt)
1931 SET_USE (use_p, tmp1);
1933 else
1935 gcc_assert (TREE_CODE (offset) == INTEGER_CST);
1936 gimple_assign_set_rhs2 (acc_info->stmt, tmp1);
1937 update_stmt (acc_info->stmt);
1941 /* ??? meanwhile this happens because we record the same access
1942 site more than once; we should be using a hash table to
1943 avoid this and insert the STMT of the access site only
1944 once.
1945 else
1946 gcc_unreachable (); */
1947 free (acc_info);
1949 VEC_free (access_site_info_p, heap, mi->access_l);
1951 update_ssa (TODO_update_ssa);
1952 #ifdef ENABLE_CHECKING
1953 verify_ssa (true);
1954 #endif
1955 return 1;
1958 /* Sort A array of counts. Arrange DIM_MAP to reflect the new order. */
1960 static void
1961 sort_dim_hot_level (gcov_type * a, int *dim_map, int n)
1963 int i, j, tmp1;
1964 gcov_type tmp;
1966 for (i = 0; i < n - 1; i++)
1968 for (j = 0; j < n - 1 - i; j++)
1970 if (a[j + 1] < a[j])
1972 tmp = a[j]; /* swap a[j] and a[j+1] */
1973 a[j] = a[j + 1];
1974 a[j + 1] = tmp;
1975 tmp1 = dim_map[j];
1976 dim_map[j] = dim_map[j + 1];
1977 dim_map[j + 1] = tmp1;
1983 /* Replace multiple mallocs (one for each dimension) to one malloc
1984 with the size of DIM1*DIM2*...*DIMN*size_of_element
1985 Make sure that we hold the size in the malloc site inside a
1986 new global variable; this way we ensure that the size doesn't
1987 change and it is accessible from all the other functions that
1988 uses the matrix. Also, the original calls to free are deleted,
1989 and replaced by a new call to free the flattened matrix. */
1991 static int
1992 transform_allocation_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1994 int i;
1995 struct matrix_info *mi;
1996 tree type, oldfn, prev_dim_size;
1997 gimple call_stmt_0, use_stmt;
1998 struct cgraph_node *c_node;
1999 struct cgraph_edge *e;
2000 gimple_stmt_iterator gsi;
2001 struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE};
2002 HOST_WIDE_INT element_size;
2004 imm_use_iterator imm_iter;
2005 use_operand_p use_p;
2006 tree old_size_0, tmp;
2007 int min_escape_l;
2008 int id;
2010 mi = (struct matrix_info *) *slot;
2012 min_escape_l = mi->min_indirect_level_escape;
2014 if (!mi->malloc_for_level)
2015 mi->min_indirect_level_escape = 0;
2017 if (mi->min_indirect_level_escape < 2)
2018 return 1;
2020 mi->dim_map = (int *) xcalloc (mi->min_indirect_level_escape, sizeof (int));
2021 for (i = 0; i < mi->min_indirect_level_escape; i++)
2022 mi->dim_map[i] = i;
2023 if (check_transpose_p)
2025 int i;
2027 if (dump_file)
2029 fprintf (dump_file, "Matrix %s:\n", get_name (mi->decl));
2030 for (i = 0; i < min_escape_l; i++)
2032 fprintf (dump_file, "dim %d before sort ", i);
2033 if (mi->dim_hot_level)
2034 fprintf (dump_file,
2035 "count is " HOST_WIDEST_INT_PRINT_DEC " \n",
2036 mi->dim_hot_level[i]);
2039 sort_dim_hot_level (mi->dim_hot_level, mi->dim_map,
2040 mi->min_indirect_level_escape);
2041 if (dump_file)
2042 for (i = 0; i < min_escape_l; i++)
2044 fprintf (dump_file, "dim %d after sort\n", i);
2045 if (mi->dim_hot_level)
2046 fprintf (dump_file, "count is " HOST_WIDE_INT_PRINT_DEC
2047 " \n", (HOST_WIDE_INT) mi->dim_hot_level[i]);
2049 for (i = 0; i < mi->min_indirect_level_escape; i++)
2051 if (dump_file)
2052 fprintf (dump_file, "dim_map[%d] after sort %d\n", i,
2053 mi->dim_map[i]);
2054 if (mi->dim_map[i] != i)
2056 if (dump_file)
2057 fprintf (dump_file,
2058 "Transposed dimensions: dim %d is now dim %d\n",
2059 mi->dim_map[i], i);
2060 mi->is_transposed_p = true;
2064 else
2066 for (i = 0; i < mi->min_indirect_level_escape; i++)
2067 mi->dim_map[i] = i;
2069 /* Call statement of allocation site of level 0. */
2070 call_stmt_0 = mi->malloc_for_level[0];
2072 /* Finds the correct malloc information. */
2073 collect_data_for_malloc_call (call_stmt_0, &mcd);
2075 mi->dimension_size[0] = mcd.size_var;
2076 mi->dimension_size_orig[0] = mcd.size_var;
2077 /* Make sure that the variables in the size expression for
2078 all the dimensions (above level 0) aren't modified in
2079 the allocation function. */
2080 for (i = 1; i < mi->min_indirect_level_escape; i++)
2082 tree t;
2083 check_var_data data;
2085 /* mi->dimension_size must contain the expression of the size calculated
2086 in check_allocation_function. */
2087 gcc_assert (mi->dimension_size[i]);
2089 data.fn = mi->allocation_function_decl;
2090 data.stmt = NULL;
2091 t = walk_tree_without_duplicates (&(mi->dimension_size[i]),
2092 check_var_notmodified_p,
2093 &data);
2094 if (t != NULL_TREE)
2096 mark_min_matrix_escape_level (mi, i, data.stmt);
2097 break;
2101 if (mi->min_indirect_level_escape < 2)
2102 return 1;
2104 /* Since we should make sure that the size expression is available
2105 before the call to malloc of level 0. */
2106 gsi = gsi_for_stmt (call_stmt_0);
2108 /* Find out the size of each dimension by looking at the malloc
2109 sites and create a global variable to hold it.
2110 We add the assignment to the global before the malloc of level 0. */
2112 /* To be able to produce gimple temporaries. */
2113 oldfn = current_function_decl;
2114 current_function_decl = mi->allocation_function_decl;
2115 push_cfun (DECL_STRUCT_FUNCTION (mi->allocation_function_decl));
2117 /* Set the dimension sizes as follows:
2118 DIM_SIZE[i] = DIM_SIZE[n] * ... * DIM_SIZE[i]
2119 where n is the maximum non escaping level. */
2120 element_size = mi->dimension_type_size[mi->min_indirect_level_escape];
2121 prev_dim_size = NULL_TREE;
2123 for (i = mi->min_indirect_level_escape - 1; i >= 0; i--)
2125 tree dim_size, dim_var;
2126 gimple stmt;
2127 tree d_type_size;
2129 /* Now put the size expression in a global variable and initialize it to
2130 the size expression before the malloc of level 0. */
2131 dim_var =
2132 add_new_static_var (TREE_TYPE
2133 (mi->dimension_size_orig[mi->dim_map[i]]));
2134 type = TREE_TYPE (mi->dimension_size_orig[mi->dim_map[i]]);
2136 /* DIM_SIZE = MALLOC_SIZE_PARAM / TYPE_SIZE. */
2137 /* Find which dim ID becomes dim I. */
2138 for (id = 0; id < mi->min_indirect_level_escape; id++)
2139 if (mi->dim_map[id] == i)
2140 break;
2141 d_type_size =
2142 build_int_cst (type, mi->dimension_type_size[id + 1]);
2143 if (!prev_dim_size)
2144 prev_dim_size = build_int_cst (type, element_size);
2145 if (!check_transpose_p && i == mi->min_indirect_level_escape - 1)
2147 dim_size = mi->dimension_size_orig[id];
2149 else
2151 dim_size =
2152 fold_build2 (TRUNC_DIV_EXPR, type, mi->dimension_size_orig[id],
2153 d_type_size);
2155 dim_size = fold_build2 (MULT_EXPR, type, dim_size, prev_dim_size);
2157 dim_size = force_gimple_operand_gsi (&gsi, dim_size, true, NULL,
2158 true, GSI_SAME_STMT);
2159 /* GLOBAL_HOLDING_THE_SIZE = DIM_SIZE. */
2160 stmt = gimple_build_assign (dim_var, dim_size);
2161 mark_symbols_for_renaming (stmt);
2162 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2164 prev_dim_size = mi->dimension_size[i] = dim_var;
2166 update_ssa (TODO_update_ssa);
2167 /* Replace the malloc size argument in the malloc of level 0 to be
2168 the size of all the dimensions. */
2169 c_node = cgraph_get_node (mi->allocation_function_decl);
2170 gcc_checking_assert (c_node);
2171 old_size_0 = gimple_call_arg (call_stmt_0, 0);
2172 tmp = force_gimple_operand_gsi (&gsi, mi->dimension_size[0], true,
2173 NULL, true, GSI_SAME_STMT);
2174 if (TREE_CODE (old_size_0) == SSA_NAME)
2176 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, old_size_0)
2177 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2178 if (use_stmt == call_stmt_0)
2179 SET_USE (use_p, tmp);
2181 /* When deleting the calls to malloc we need also to remove the edge from
2182 the call graph to keep it consistent. Notice that cgraph_edge may
2183 create a new node in the call graph if there is no node for the given
2184 declaration; this shouldn't be the case but currently there is no way to
2185 check this outside of "cgraph.c". */
2186 for (i = 1; i < mi->min_indirect_level_escape; i++)
2188 gimple_stmt_iterator gsi;
2190 gimple call_stmt = mi->malloc_for_level[i];
2191 gcc_assert (is_gimple_call (call_stmt));
2192 e = cgraph_edge (c_node, call_stmt);
2193 gcc_assert (e);
2194 cgraph_remove_edge (e);
2195 gsi = gsi_for_stmt (call_stmt);
2196 /* Remove the call stmt. */
2197 gsi_remove (&gsi, true);
2198 /* Remove the assignment of the allocated area. */
2199 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2200 gimple_call_lhs (call_stmt))
2202 gsi = gsi_for_stmt (use_stmt);
2203 gsi_remove (&gsi, true);
2206 update_ssa (TODO_update_ssa);
2207 #ifdef ENABLE_CHECKING
2208 verify_ssa (true);
2209 #endif
2210 /* Delete the calls to free. */
2211 for (i = 1; i < mi->min_indirect_level_escape; i++)
2213 gimple_stmt_iterator gsi;
2215 /* ??? wonder why this case is possible but we failed on it once. */
2216 if (!mi->free_stmts[i].stmt)
2217 continue;
2219 c_node = cgraph_get_node (mi->free_stmts[i].func);
2220 gcc_checking_assert (c_node);
2221 gcc_assert (is_gimple_call (mi->free_stmts[i].stmt));
2222 e = cgraph_edge (c_node, mi->free_stmts[i].stmt);
2223 gcc_assert (e);
2224 cgraph_remove_edge (e);
2225 current_function_decl = mi->free_stmts[i].func;
2226 set_cfun (DECL_STRUCT_FUNCTION (mi->free_stmts[i].func));
2227 gsi = gsi_for_stmt (mi->free_stmts[i].stmt);
2228 gsi_remove (&gsi, true);
2230 /* Return to the previous situation. */
2231 current_function_decl = oldfn;
2232 pop_cfun ();
2233 return 1;
2238 /* Print out the results of the escape analysis. */
2239 static int
2240 dump_matrix_reorg_analysis (void **slot, void *data ATTRIBUTE_UNUSED)
2242 struct matrix_info *mi = (struct matrix_info *) *slot;
2244 if (!dump_file)
2245 return 1;
2246 fprintf (dump_file, "Matrix \"%s\"; Escaping Level: %d, Num Dims: %d,",
2247 get_name (mi->decl), mi->min_indirect_level_escape, mi->num_dims);
2248 fprintf (dump_file, " Malloc Dims: %d, ", mi->max_malloced_level);
2249 fprintf (dump_file, "\n");
2250 if (mi->min_indirect_level_escape >= 2)
2251 fprintf (dump_file, "Flattened %d dimensions \n",
2252 mi->min_indirect_level_escape);
2253 return 1;
2256 /* Perform matrix flattening. */
2258 static unsigned int
2259 matrix_reorg (void)
2261 struct cgraph_node *node;
2263 if (profile_info)
2264 check_transpose_p = true;
2265 else
2266 check_transpose_p = false;
2267 /* If there are hand written vectors, we skip this optimization. */
2268 for (node = cgraph_nodes; node; node = node->next)
2269 if (!may_flatten_matrices (node))
2270 return 0;
2271 matrices_to_reorg = htab_create (37, mtt_info_hash, mtt_info_eq, mat_free);
2272 /* Find and record all potential matrices in the program. */
2273 find_matrices_decl ();
2274 /* Analyze the accesses of the matrices (escaping analysis). */
2275 for (node = cgraph_nodes; node; node = node->next)
2276 if (node->analyzed)
2278 tree temp_fn;
2280 temp_fn = current_function_decl;
2281 current_function_decl = node->decl;
2282 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2283 bitmap_obstack_initialize (NULL);
2284 gimple_register_cfg_hooks ();
2286 if (!gimple_in_ssa_p (cfun))
2288 free_dominance_info (CDI_DOMINATORS);
2289 free_dominance_info (CDI_POST_DOMINATORS);
2290 pop_cfun ();
2291 current_function_decl = temp_fn;
2292 bitmap_obstack_release (NULL);
2294 return 0;
2297 #ifdef ENABLE_CHECKING
2298 verify_flow_info ();
2299 #endif
2301 if (!matrices_to_reorg)
2303 free_dominance_info (CDI_DOMINATORS);
2304 free_dominance_info (CDI_POST_DOMINATORS);
2305 pop_cfun ();
2306 current_function_decl = temp_fn;
2307 bitmap_obstack_release (NULL);
2309 return 0;
2312 /* Create htap for phi nodes. */
2313 htab_mat_acc_phi_nodes = htab_create (37, mat_acc_phi_hash,
2314 mat_acc_phi_eq, free);
2315 if (!check_transpose_p)
2316 find_sites_in_func (false);
2317 else
2319 find_sites_in_func (true);
2320 loop_optimizer_init (LOOPS_NORMAL);
2321 if (current_loops)
2322 scev_initialize ();
2323 htab_traverse (matrices_to_reorg, analyze_transpose, NULL);
2324 if (current_loops)
2326 scev_finalize ();
2327 loop_optimizer_finalize ();
2328 current_loops = NULL;
2331 /* If the current function is the allocation function for any of
2332 the matrices we check its allocation and the escaping level. */
2333 htab_traverse (matrices_to_reorg, check_allocation_function, NULL);
2334 free_dominance_info (CDI_DOMINATORS);
2335 free_dominance_info (CDI_POST_DOMINATORS);
2336 pop_cfun ();
2337 current_function_decl = temp_fn;
2338 bitmap_obstack_release (NULL);
2340 htab_traverse (matrices_to_reorg, transform_allocation_sites, NULL);
2341 /* Now transform the accesses. */
2342 for (node = cgraph_nodes; node; node = node->next)
2343 if (node->analyzed)
2345 /* Remember that allocation sites have been handled. */
2346 tree temp_fn;
2348 temp_fn = current_function_decl;
2349 current_function_decl = node->decl;
2350 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2351 bitmap_obstack_initialize (NULL);
2352 gimple_register_cfg_hooks ();
2353 record_all_accesses_in_func ();
2354 htab_traverse (matrices_to_reorg, transform_access_sites, NULL);
2355 cgraph_rebuild_references ();
2356 free_dominance_info (CDI_DOMINATORS);
2357 free_dominance_info (CDI_POST_DOMINATORS);
2358 pop_cfun ();
2359 current_function_decl = temp_fn;
2360 bitmap_obstack_release (NULL);
2362 htab_traverse (matrices_to_reorg, dump_matrix_reorg_analysis, NULL);
2364 current_function_decl = NULL;
2365 set_cfun (NULL);
2366 matrices_to_reorg = NULL;
2367 return 0;
2371 /* The condition for matrix flattening to be performed. */
2372 static bool
2373 gate_matrix_reorg (void)
2375 return flag_ipa_matrix_reorg && flag_whole_program;
2378 struct simple_ipa_opt_pass pass_ipa_matrix_reorg =
2381 SIMPLE_IPA_PASS,
2382 "matrix-reorg", /* name */
2383 gate_matrix_reorg, /* gate */
2384 matrix_reorg, /* execute */
2385 NULL, /* sub */
2386 NULL, /* next */
2387 0, /* static_pass_number */
2388 TV_NONE, /* tv_id */
2389 0, /* properties_required */
2390 0, /* properties_provided */
2391 0, /* properties_destroyed */
2392 0, /* todo_flags_start */
2393 TODO_dump_cgraph /* todo_flags_finish */