Merge with trank @ 137446
[official-gcc.git] / gcc / matrix-reorg.c
blob33bb0b4e8f4c83b8555b8ccc3c758678c0246ecb
1 /* Matrix layout transformations.
2 Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
3 Contributed by Razya Ladelsky <razya@il.ibm.com>
4 Originally written by Revital Eres and Mustafa Hagog.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
23 Matrix flattening optimization tries to replace a N-dimensional
24 matrix with its equivalent M-dimensional matrix, where M < N.
25 This first implementation focuses on global matrices defined dynamically.
27 When N==1, we actually flatten the whole matrix.
28 For instance consider a two-dimensional array a [dim1] [dim2].
29 The code for allocating space for it usually looks like:
31 a = (int **) malloc(dim1 * sizeof(int *));
32 for (i=0; i<dim1; i++)
33 a[i] = (int *) malloc (dim2 * sizeof(int));
35 If the array "a" is found suitable for this optimization,
36 its allocation is replaced by:
38 a = (int *) malloc (dim1 * dim2 *sizeof(int));
40 and all the references to a[i][j] are replaced by a[i * dim2 + j].
42 The two main phases of the optimization are the analysis
43 and transformation.
44 The driver of the optimization is matrix_reorg ().
48 Analysis phase:
49 ===============
51 We'll number the dimensions outside-in, meaning the most external
52 is 0, then 1, and so on.
53 The analysis part of the optimization determines K, the escape
54 level of a N-dimensional matrix (K <= N), that allows flattening of
55 the external dimensions 0,1,..., K-1. Escape level 0 means that the
56 whole matrix escapes and no flattening is possible.
58 The analysis part is implemented in analyze_matrix_allocation_site()
59 and analyze_matrix_accesses().
61 Transformation phase:
62 =====================
63 In this phase we define the new flattened matrices that replace the
64 original matrices in the code.
65 Implemented in transform_allocation_sites(),
66 transform_access_sites().
68 Matrix Transposing
69 ==================
70 The idea of Matrix Transposing is organizing the matrix in a different
71 layout such that the dimensions are reordered.
72 This could produce better cache behavior in some cases.
74 For example, lets look at the matrix accesses in the following loop:
76 for (i=0; i<N; i++)
77 for (j=0; j<M; j++)
78 access to a[i][j]
80 This loop can produce good cache behavior because the elements of
81 the inner dimension are accessed sequentially.
83 However, if the accesses of the matrix were of the following form:
85 for (i=0; i<N; i++)
86 for (j=0; j<M; j++)
87 access to a[j][i]
89 In this loop we iterate the columns and not the rows.
90 Therefore, replacing the rows and columns
91 would have had an organization with better (cache) locality.
92 Replacing the dimensions of the matrix is called matrix transposing.
94 This example, of course, could be enhanced to multiple dimensions matrices
95 as well.
97 Since a program could include all kind of accesses, there is a decision
98 mechanism, implemented in analyze_transpose(), which implements a
99 heuristic that tries to determine whether to transpose the matrix or not,
100 according to the form of the more dominant accesses.
101 This decision is transferred to the flattening mechanism, and whether
102 the matrix was transposed or not, the matrix is flattened (if possible).
104 This decision making is based on profiling information and loop information.
105 If profiling information is available, decision making mechanism will be
106 operated, otherwise the matrix will only be flattened (if possible).
108 Both optimizations are described in the paper "Matrix flattening and
109 transposing in GCC" which was presented in GCC summit 2006.
110 http://www.gccsummit.org/2006/2006-GCC-Summit-Proceedings.pdf
114 #include "config.h"
115 #include "system.h"
116 #include "coretypes.h"
117 #include "tm.h"
118 #include "tree.h"
119 #include "rtl.h"
120 #include "c-tree.h"
121 #include "tree-inline.h"
122 #include "tree-flow.h"
123 #include "tree-flow-inline.h"
124 #include "langhooks.h"
125 #include "hashtab.h"
126 #include "toplev.h"
127 #include "flags.h"
128 #include "ggc.h"
129 #include "debug.h"
130 #include "target.h"
131 #include "cgraph.h"
132 #include "diagnostic.h"
133 #include "timevar.h"
134 #include "params.h"
135 #include "fibheap.h"
136 #include "c-common.h"
137 #include "intl.h"
138 #include "function.h"
139 #include "basic-block.h"
140 #include "cfgloop.h"
141 #include "tree-iterator.h"
142 #include "tree-pass.h"
143 #include "opts.h"
144 #include "tree-data-ref.h"
145 #include "tree-chrec.h"
146 #include "tree-scalar-evolution.h"
149 We need to collect a lot of data from the original malloc,
150 particularly as the gimplifier has converted:
152 orig_var = (struct_type *) malloc (x * sizeof (struct_type *));
154 into
156 T3 = <constant> ; ** <constant> is amount to malloc; precomputed **
157 T4 = malloc (T3);
158 T5 = (struct_type *) T4;
159 orig_var = T5;
161 The following struct fields allow us to collect all the necessary data from
162 the gimplified program. The comments in the struct below are all based
163 on the gimple example above. */
165 struct malloc_call_data
167 tree call_stmt; /* Tree for "T4 = malloc (T3);" */
168 tree size_var; /* Var decl for T3. */
169 tree malloc_size; /* Tree for "<constant>", the rhs assigned to T3. */
172 /* The front end of the compiler, when parsing statements of the form:
174 var = (type_cast) malloc (sizeof (type));
176 always converts this single statement into the following statements
177 (GIMPLE form):
179 T.1 = sizeof (type);
180 T.2 = malloc (T.1);
181 T.3 = (type_cast) T.2;
182 var = T.3;
184 Since we need to create new malloc statements and modify the original
185 statements somewhat, we need to find all four of the above statements.
186 Currently record_call_1 (called for building cgraph edges) finds and
187 records the statements containing the actual call to malloc, but we
188 need to find the rest of the variables/statements on our own. That
189 is what the following function does. */
190 static void
191 collect_data_for_malloc_call (tree stmt, struct malloc_call_data *m_data)
193 tree size_var = NULL;
194 tree malloc_fn_decl;
195 tree tmp;
196 tree arg1;
198 gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
200 tmp = get_call_expr_in (stmt);
201 malloc_fn_decl = CALL_EXPR_FN (tmp);
202 if (TREE_CODE (malloc_fn_decl) != ADDR_EXPR
203 || TREE_CODE (TREE_OPERAND (malloc_fn_decl, 0)) != FUNCTION_DECL
204 || DECL_FUNCTION_CODE (TREE_OPERAND (malloc_fn_decl, 0)) !=
205 BUILT_IN_MALLOC)
206 return;
208 arg1 = CALL_EXPR_ARG (tmp, 0);
209 size_var = arg1;
211 m_data->call_stmt = stmt;
212 m_data->size_var = size_var;
213 if (TREE_CODE (size_var) != VAR_DECL)
214 m_data->malloc_size = size_var;
215 else
216 m_data->malloc_size = NULL_TREE;
219 /* Information about matrix access site.
220 For example: if an access site of matrix arr is arr[i][j]
221 the ACCESS_SITE_INFO structure will have the address
222 of arr as its stmt. The INDEX_INFO will hold information about the
223 initial address and index of each dimension. */
224 struct access_site_info
226 /* The statement (INDIRECT_REF or POINTER_PLUS_EXPR). */
227 tree stmt;
229 /* In case of POINTER_PLUS_EXPR, what is the offset. */
230 tree offset;
232 /* The index which created the offset. */
233 tree index;
235 /* The indirection level of this statement. */
236 int level;
238 /* TRUE for allocation site FALSE for access site. */
239 bool is_alloc;
241 /* The function containing the access site. */
242 tree function_decl;
244 /* This access is iterated in the inner most loop */
245 bool iterated_by_inner_most_loop_p;
248 typedef struct access_site_info *access_site_info_p;
249 DEF_VEC_P (access_site_info_p);
250 DEF_VEC_ALLOC_P (access_site_info_p, heap);
252 /* Information about matrix to flatten. */
253 struct matrix_info
255 /* Decl tree of this matrix. */
256 tree decl;
257 /* Number of dimensions; number
258 of "*" in the type declaration. */
259 int num_dims;
261 /* Minimum indirection level that escapes, 0 means that
262 the whole matrix escapes, k means that dimensions
263 0 to ACTUAL_DIM - k escapes. */
264 int min_indirect_level_escape;
266 tree min_indirect_level_escape_stmt;
268 /* Is the matrix transposed. */
269 bool is_transposed_p;
271 /* Hold the allocation site for each level (dimension).
272 We can use NUM_DIMS as the upper bound and allocate the array
273 once with this number of elements and no need to use realloc and
274 MAX_MALLOCED_LEVEL. */
275 tree *malloc_for_level;
277 int max_malloced_level;
279 /* The location of the allocation sites (they must be in one
280 function). */
281 tree allocation_function_decl;
283 /* The calls to free for each level of indirection. */
284 struct free_info
286 tree stmt;
287 tree func;
288 } *free_stmts;
290 /* An array which holds for each dimension its size. where
291 dimension 0 is the outer most (one that contains all the others).
293 tree *dimension_size;
295 /* An array which holds for each dimension it's original size
296 (before transposing and flattening take place). */
297 tree *dimension_size_orig;
299 /* An array which holds for each dimension the size of the type of
300 of elements accessed in that level (in bytes). */
301 HOST_WIDE_INT *dimension_type_size;
303 int dimension_type_size_len;
305 /* An array collecting the count of accesses for each dimension. */
306 gcov_type *dim_hot_level;
308 /* An array of the accesses to be flattened.
309 elements are of type "struct access_site_info *". */
310 VEC (access_site_info_p, heap) * access_l;
312 /* A map of how the dimensions will be organized at the end of
313 the analyses. */
314 int *dim_map;
317 /* In each phi node we want to record the indirection level we have when we
318 get to the phi node. Usually we will have phi nodes with more than two
319 arguments, then we must assure that all of them get to the phi node with
320 the same indirection level, otherwise it's not safe to do the flattening.
321 So we record the information regarding the indirection level each time we
322 get to the phi node in this hash table. */
324 struct matrix_access_phi_node
326 tree phi;
327 int indirection_level;
330 /* We use this structure to find if the SSA variable is accessed inside the
331 tree and record the tree containing it. */
333 struct ssa_acc_in_tree
335 /* The variable whose accesses in the tree we are looking for. */
336 tree ssa_var;
337 /* The tree and code inside it the ssa_var is accessed, currently
338 it could be an INDIRECT_REF or CALL_EXPR. */
339 enum tree_code t_code;
340 tree t_tree;
341 /* The place in the containing tree. */
342 tree *tp;
343 tree second_op;
344 bool var_found;
347 static void analyze_matrix_accesses (struct matrix_info *, tree, int, bool,
348 sbitmap, bool);
349 static int transform_allocation_sites (void **, void *);
350 static int transform_access_sites (void **, void *);
351 static int analyze_transpose (void **, void *);
352 static int dump_matrix_reorg_analysis (void **, void *);
354 static bool check_transpose_p;
356 /* Hash function used for the phi nodes. */
358 static hashval_t
359 mat_acc_phi_hash (const void *p)
361 const struct matrix_access_phi_node *const ma_phi =
362 (const struct matrix_access_phi_node *) p;
364 return htab_hash_pointer (ma_phi->phi);
367 /* Equality means phi node pointers are the same. */
369 static int
370 mat_acc_phi_eq (const void *p1, const void *p2)
372 const struct matrix_access_phi_node *const phi1 =
373 (const struct matrix_access_phi_node *) p1;
374 const struct matrix_access_phi_node *const phi2 =
375 (const struct matrix_access_phi_node *) p2;
377 if (phi1->phi == phi2->phi)
378 return 1;
380 return 0;
383 /* Hold the PHI nodes we visit during the traversal for escaping
384 analysis. */
385 static htab_t htab_mat_acc_phi_nodes = NULL;
387 /* This hash-table holds the information about the matrices we are
388 going to handle. */
389 static htab_t matrices_to_reorg = NULL;
391 /* Return a hash for MTT, which is really a "matrix_info *". */
392 static hashval_t
393 mtt_info_hash (const void *mtt)
395 return htab_hash_pointer (((const struct matrix_info *) mtt)->decl);
398 /* Return true if MTT1 and MTT2 (which are really both of type
399 "matrix_info *") refer to the same decl. */
400 static int
401 mtt_info_eq (const void *mtt1, const void *mtt2)
403 const struct matrix_info *const i1 = (const struct matrix_info *) mtt1;
404 const struct matrix_info *const i2 = (const struct matrix_info *) mtt2;
406 if (i1->decl == i2->decl)
407 return true;
409 return false;
412 /* Return the inner most tree that is not a cast. */
413 static tree
414 get_inner_of_cast_expr (tree t)
416 while (CONVERT_EXPR_P (t)
417 || TREE_CODE (t) == VIEW_CONVERT_EXPR)
418 t = TREE_OPERAND (t, 0);
420 return t;
423 /* Return false if STMT may contain a vector expression.
424 In this situation, all matrices should not be flattened. */
425 static bool
426 may_flatten_matrices_1 (tree stmt)
428 tree t;
430 switch (TREE_CODE (stmt))
432 case GIMPLE_MODIFY_STMT:
433 t = GIMPLE_STMT_OPERAND (stmt, 1);
434 while (CONVERT_EXPR_P (t))
436 if (TREE_TYPE (t) && POINTER_TYPE_P (TREE_TYPE (t)))
438 tree pointee;
440 pointee = TREE_TYPE (t);
441 while (POINTER_TYPE_P (pointee))
442 pointee = TREE_TYPE (pointee);
443 if (TREE_CODE (pointee) == VECTOR_TYPE)
445 if (dump_file)
446 fprintf (dump_file,
447 "Found vector type, don't flatten matrix\n");
448 return false;
451 t = TREE_OPERAND (t, 0);
453 break;
454 case ASM_EXPR:
455 /* Asm code could contain vector operations. */
456 return false;
457 break;
458 default:
459 break;
461 return true;
464 /* Return false if there are hand-written vectors in the program.
465 We disable the flattening in such a case. */
466 static bool
467 may_flatten_matrices (struct cgraph_node *node)
469 tree decl;
470 struct function *func;
471 basic_block bb;
472 block_stmt_iterator bsi;
474 decl = node->decl;
475 if (node->analyzed)
477 func = DECL_STRUCT_FUNCTION (decl);
478 FOR_EACH_BB_FN (bb, func)
479 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
480 if (!may_flatten_matrices_1 (bsi_stmt (bsi)))
481 return false;
483 return true;
486 /* Given a VAR_DECL, check its type to determine whether it is
487 a definition of a dynamic allocated matrix and therefore is
488 a suitable candidate for the matrix flattening optimization.
489 Return NULL if VAR_DECL is not such decl. Otherwise, allocate
490 a MATRIX_INFO structure, fill it with the relevant information
491 and return a pointer to it.
492 TODO: handle also statically defined arrays. */
493 static struct matrix_info *
494 analyze_matrix_decl (tree var_decl)
496 struct matrix_info *m_node, tmpmi, *mi;
497 tree var_type;
498 int dim_num = 0;
500 gcc_assert (matrices_to_reorg);
502 if (TREE_CODE (var_decl) == PARM_DECL)
503 var_type = DECL_ARG_TYPE (var_decl);
504 else if (TREE_CODE (var_decl) == VAR_DECL)
505 var_type = TREE_TYPE (var_decl);
506 else
507 return NULL;
509 if (!POINTER_TYPE_P (var_type))
510 return NULL;
512 while (POINTER_TYPE_P (var_type))
514 var_type = TREE_TYPE (var_type);
515 dim_num++;
518 if (dim_num <= 1)
519 return NULL;
521 if (!COMPLETE_TYPE_P (var_type)
522 || TREE_CODE (TYPE_SIZE_UNIT (var_type)) != INTEGER_CST)
523 return NULL;
525 /* Check to see if this pointer is already in there. */
526 tmpmi.decl = var_decl;
527 mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi);
529 if (mi)
530 return NULL;
532 /* Record the matrix. */
534 m_node = (struct matrix_info *) xcalloc (1, sizeof (struct matrix_info));
535 m_node->decl = var_decl;
536 m_node->num_dims = dim_num;
537 m_node->free_stmts
538 = (struct free_info *) xcalloc (dim_num, sizeof (struct free_info));
540 /* Init min_indirect_level_escape to -1 to indicate that no escape
541 analysis has been done yet. */
542 m_node->min_indirect_level_escape = -1;
543 m_node->is_transposed_p = false;
545 return m_node;
548 /* Free matrix E. */
549 static void
550 mat_free (void *e)
552 struct matrix_info *mat = (struct matrix_info *) e;
554 if (!mat)
555 return;
557 if (mat->free_stmts)
558 free (mat->free_stmts);
559 if (mat->dim_hot_level)
560 free (mat->dim_hot_level);
561 if (mat->malloc_for_level)
562 free (mat->malloc_for_level);
565 /* Find all potential matrices.
566 TODO: currently we handle only multidimensional
567 dynamically allocated arrays. */
568 static void
569 find_matrices_decl (void)
571 struct matrix_info *tmp;
572 PTR *slot;
573 struct varpool_node *vnode;
575 gcc_assert (matrices_to_reorg);
577 /* For every global variable in the program:
578 Check to see if it's of a candidate type and record it. */
579 for (vnode = varpool_nodes_queue; vnode; vnode = vnode->next_needed)
581 tree var_decl = vnode->decl;
583 if (!var_decl || TREE_CODE (var_decl) != VAR_DECL)
584 continue;
586 if (matrices_to_reorg)
587 if ((tmp = analyze_matrix_decl (var_decl)))
589 if (!TREE_ADDRESSABLE (var_decl))
591 slot = htab_find_slot (matrices_to_reorg, tmp, INSERT);
592 *slot = tmp;
596 return;
599 /* Mark that the matrix MI escapes at level L. */
600 static void
601 mark_min_matrix_escape_level (struct matrix_info *mi, int l, tree s)
603 if (mi->min_indirect_level_escape == -1
604 || (mi->min_indirect_level_escape > l))
606 mi->min_indirect_level_escape = l;
607 mi->min_indirect_level_escape_stmt = s;
611 /* Find if the SSA variable is accessed inside the
612 tree and record the tree containing it.
613 The only relevant uses are the case of SSA_NAME, or SSA inside
614 INDIRECT_REF, CALL_EXPR, PLUS_EXPR, POINTER_PLUS_EXPR, MULT_EXPR. */
615 static void
616 ssa_accessed_in_tree (tree t, struct ssa_acc_in_tree *a)
618 tree call, decl;
619 tree arg;
620 call_expr_arg_iterator iter;
622 a->t_code = TREE_CODE (t);
623 switch (a->t_code)
625 tree op1, op2;
627 case SSA_NAME:
628 if (t == a->ssa_var)
629 a->var_found = true;
630 break;
631 case INDIRECT_REF:
632 if (SSA_VAR_P (TREE_OPERAND (t, 0))
633 && TREE_OPERAND (t, 0) == a->ssa_var)
634 a->var_found = true;
635 break;
636 case CALL_EXPR:
637 FOR_EACH_CALL_EXPR_ARG (arg, iter, t)
639 if (arg == a->ssa_var)
641 a->var_found = true;
642 call = get_call_expr_in (t);
643 if (call && (decl = get_callee_fndecl (call)))
644 a->t_tree = decl;
645 break;
648 break;
649 case POINTER_PLUS_EXPR:
650 case PLUS_EXPR:
651 case MULT_EXPR:
652 op1 = TREE_OPERAND (t, 0);
653 op2 = TREE_OPERAND (t, 1);
655 if (op1 == a->ssa_var)
657 a->var_found = true;
658 a->second_op = op2;
660 else if (op2 == a->ssa_var)
662 a->var_found = true;
663 a->second_op = op1;
665 break;
666 default:
667 break;
671 /* Record the access/allocation site information for matrix MI so we can
672 handle it later in transformation. */
673 static void
674 record_access_alloc_site_info (struct matrix_info *mi, tree stmt, tree offset,
675 tree index, int level, bool is_alloc)
677 struct access_site_info *acc_info;
679 if (!mi->access_l)
680 mi->access_l = VEC_alloc (access_site_info_p, heap, 100);
682 acc_info
683 = (struct access_site_info *)
684 xcalloc (1, sizeof (struct access_site_info));
685 acc_info->stmt = stmt;
686 acc_info->offset = offset;
687 acc_info->index = index;
688 acc_info->function_decl = current_function_decl;
689 acc_info->level = level;
690 acc_info->is_alloc = is_alloc;
692 VEC_safe_push (access_site_info_p, heap, mi->access_l, acc_info);
696 /* Record the malloc as the allocation site of the given LEVEL. But
697 first we Make sure that all the size parameters passed to malloc in
698 all the allocation sites could be pre-calculated before the call to
699 the malloc of level 0 (the main malloc call). */
700 static void
701 add_allocation_site (struct matrix_info *mi, tree stmt, int level)
703 struct malloc_call_data mcd;
705 /* Make sure that the allocation sites are in the same function. */
706 if (!mi->allocation_function_decl)
707 mi->allocation_function_decl = current_function_decl;
708 else if (mi->allocation_function_decl != current_function_decl)
710 int min_malloc_level;
712 gcc_assert (mi->malloc_for_level);
714 /* Find the minimum malloc level that already has been seen;
715 we known its allocation function must be
716 MI->allocation_function_decl since it's different than
717 CURRENT_FUNCTION_DECL then the escaping level should be
718 MIN (LEVEL, MIN_MALLOC_LEVEL) - 1 , and the allocation function
719 must be set accordingly. */
720 for (min_malloc_level = 0;
721 min_malloc_level < mi->max_malloced_level
722 && mi->malloc_for_level[min_malloc_level]; min_malloc_level++);
723 if (level < min_malloc_level)
725 mi->allocation_function_decl = current_function_decl;
726 mark_min_matrix_escape_level (mi, min_malloc_level, stmt);
728 else
730 mark_min_matrix_escape_level (mi, level, stmt);
731 /* cannot be that (level == min_malloc_level)
732 we would have returned earlier. */
733 return;
737 /* Find the correct malloc information. */
738 collect_data_for_malloc_call (stmt, &mcd);
740 /* We accept only calls to malloc function; we do not accept
741 calls like calloc and realloc. */
742 if (!mi->malloc_for_level)
744 mi->malloc_for_level = XCNEWVEC (tree, level + 1);
745 mi->max_malloced_level = level + 1;
747 else if (mi->max_malloced_level <= level)
749 mi->malloc_for_level
750 = XRESIZEVEC (tree, mi->malloc_for_level, level + 1);
752 /* Zero the newly allocated items. */
753 memset (&(mi->malloc_for_level[mi->max_malloced_level + 1]),
754 0, (level - mi->max_malloced_level) * sizeof (tree));
756 mi->max_malloced_level = level + 1;
758 mi->malloc_for_level[level] = stmt;
761 /* Given an assignment statement STMT that we know that its
762 left-hand-side is the matrix MI variable, we traverse the immediate
763 uses backwards until we get to a malloc site. We make sure that
764 there is one and only one malloc site that sets this variable. When
765 we are performing the flattening we generate a new variable that
766 will hold the size for each dimension; each malloc that allocates a
767 dimension has the size parameter; we use that parameter to
768 initialize the dimension size variable so we can use it later in
769 the address calculations. LEVEL is the dimension we're inspecting.
770 Return if STMT is related to an allocation site. */
772 static void
773 analyze_matrix_allocation_site (struct matrix_info *mi, tree stmt,
774 int level, sbitmap visited)
776 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
778 tree rhs = GIMPLE_STMT_OPERAND (stmt, 1);
780 rhs = get_inner_of_cast_expr (rhs);
781 if (TREE_CODE (rhs) == SSA_NAME)
783 tree def = SSA_NAME_DEF_STMT (rhs);
785 analyze_matrix_allocation_site (mi, def, level, visited);
786 return;
789 /* A result of call to malloc. */
790 else if (TREE_CODE (rhs) == CALL_EXPR)
792 int call_flags = call_expr_flags (rhs);
794 if (!(call_flags & ECF_MALLOC))
796 mark_min_matrix_escape_level (mi, level, stmt);
797 return;
799 else
801 tree malloc_fn_decl;
802 const char *malloc_fname;
804 malloc_fn_decl = CALL_EXPR_FN (rhs);
805 if (TREE_CODE (malloc_fn_decl) != ADDR_EXPR
806 || TREE_CODE (TREE_OPERAND (malloc_fn_decl, 0)) !=
807 FUNCTION_DECL)
809 mark_min_matrix_escape_level (mi, level, stmt);
810 return;
812 malloc_fn_decl = TREE_OPERAND (malloc_fn_decl, 0);
813 malloc_fname = IDENTIFIER_POINTER (DECL_NAME (malloc_fn_decl));
814 if (DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
816 if (dump_file)
817 fprintf (dump_file,
818 "Matrix %s is an argument to function %s\n",
819 get_name (mi->decl), get_name (malloc_fn_decl));
820 mark_min_matrix_escape_level (mi, level, stmt);
821 return;
824 /* This is a call to malloc of level 'level'.
825 mi->max_malloced_level-1 == level means that we've
826 seen a malloc statement of level 'level' before.
827 If the statement is not the same one that we've
828 seen before, then there's another malloc statement
829 for the same level, which means that we need to mark
830 it escaping. */
831 if (mi->malloc_for_level
832 && mi->max_malloced_level-1 == level
833 && mi->malloc_for_level[level] != stmt)
835 mark_min_matrix_escape_level (mi, level, stmt);
836 return;
838 else
839 add_allocation_site (mi, stmt, level);
840 return;
842 /* If we are back to the original matrix variable then we
843 are sure that this is analyzed as an access site. */
844 else if (rhs == mi->decl)
845 return;
847 /* Looks like we don't know what is happening in this
848 statement so be in the safe side and mark it as escaping. */
849 mark_min_matrix_escape_level (mi, level, stmt);
852 /* The transposing decision making.
853 In order to to calculate the profitability of transposing, we collect two
854 types of information regarding the accesses:
855 1. profiling information used to express the hotness of an access, that
856 is how often the matrix is accessed by this access site (count of the
857 access site).
858 2. which dimension in the access site is iterated by the inner
859 most loop containing this access.
861 The matrix will have a calculated value of weighted hotness for each
862 dimension.
863 Intuitively the hotness level of a dimension is a function of how
864 many times it was the most frequently accessed dimension in the
865 highly executed access sites of this matrix.
867 As computed by following equation:
868 m n
869 __ __
870 \ \ dim_hot_level[i] +=
871 /_ /_
872 j i
873 acc[j]->dim[i]->iter_by_inner_loop * count(j)
875 Where n is the number of dims and m is the number of the matrix
876 access sites. acc[j]->dim[i]->iter_by_inner_loop is 1 if acc[j]
877 iterates over dim[i] in innermost loop, and is 0 otherwise.
879 The organization of the new matrix should be according to the
880 hotness of each dimension. The hotness of the dimension implies
881 the locality of the elements.*/
882 static int
883 analyze_transpose (void **slot, void *data ATTRIBUTE_UNUSED)
885 struct matrix_info *mi = (struct matrix_info *) *slot;
886 int min_escape_l = mi->min_indirect_level_escape;
887 struct loop *loop;
888 affine_iv iv;
889 struct access_site_info *acc_info;
890 int i;
892 if (min_escape_l < 2 || !mi->access_l)
894 if (mi->access_l)
896 for (i = 0;
897 VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
898 i++)
899 free (acc_info);
900 VEC_free (access_site_info_p, heap, mi->access_l);
903 return 1;
905 if (!mi->dim_hot_level)
906 mi->dim_hot_level =
907 (gcov_type *) xcalloc (min_escape_l, sizeof (gcov_type));
910 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
911 i++)
913 if (TREE_CODE (GIMPLE_STMT_OPERAND (acc_info->stmt, 1)) == POINTER_PLUS_EXPR
914 && acc_info->level < min_escape_l)
916 loop = loop_containing_stmt (acc_info->stmt);
917 if (!loop || loop->inner)
919 free (acc_info);
920 continue;
922 if (simple_iv (loop, acc_info->stmt, acc_info->offset, &iv, true))
924 if (iv.step != NULL)
926 HOST_WIDE_INT istep;
928 istep = int_cst_value (iv.step);
929 if (istep != 0)
931 acc_info->iterated_by_inner_most_loop_p = 1;
932 mi->dim_hot_level[acc_info->level] +=
933 bb_for_stmt (acc_info->stmt)->count;
939 free (acc_info);
941 VEC_free (access_site_info_p, heap, mi->access_l);
943 return 1;
946 /* Find the index which defines the OFFSET from base.
947 We walk from use to def until we find how the offset was defined. */
948 static tree
949 get_index_from_offset (tree offset, tree def_stmt)
951 tree op1, op2, expr, index;
953 if (TREE_CODE (def_stmt) == PHI_NODE)
954 return NULL;
955 expr = get_inner_of_cast_expr (GIMPLE_STMT_OPERAND (def_stmt, 1));
956 if (TREE_CODE (expr) == SSA_NAME)
957 return get_index_from_offset (offset, SSA_NAME_DEF_STMT (expr));
958 else if (TREE_CODE (expr) == MULT_EXPR)
960 op1 = TREE_OPERAND (expr, 0);
961 op2 = TREE_OPERAND (expr, 1);
962 if (TREE_CODE (op1) != INTEGER_CST && TREE_CODE (op2) != INTEGER_CST)
963 return NULL;
964 index = (TREE_CODE (op1) == INTEGER_CST) ? op2 : op1;
965 return index;
967 else
968 return NULL_TREE;
971 /* update MI->dimension_type_size[CURRENT_INDIRECT_LEVEL] with the size
972 of the type related to the SSA_VAR, or the type related to the
973 lhs of STMT, in the case that it is an INDIRECT_REF. */
974 static void
975 update_type_size (struct matrix_info *mi, tree stmt, tree ssa_var,
976 int current_indirect_level)
978 tree lhs;
979 HOST_WIDE_INT type_size;
981 /* Update type according to the type of the INDIRECT_REF expr. */
982 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
983 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == INDIRECT_REF)
985 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
986 gcc_assert (POINTER_TYPE_P
987 (TREE_TYPE (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
988 type_size =
989 int_size_in_bytes (TREE_TYPE
990 (TREE_TYPE
991 (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
993 else
994 type_size = int_size_in_bytes (TREE_TYPE (ssa_var));
996 /* Record the size of elements accessed (as a whole)
997 in the current indirection level (dimension). If the size of
998 elements is not known at compile time, mark it as escaping. */
999 if (type_size <= 0)
1000 mark_min_matrix_escape_level (mi, current_indirect_level, stmt);
1001 else
1003 int l = current_indirect_level;
1005 if (!mi->dimension_type_size)
1007 mi->dimension_type_size
1008 = (HOST_WIDE_INT *) xcalloc (l + 1, sizeof (HOST_WIDE_INT));
1009 mi->dimension_type_size_len = l + 1;
1011 else if (mi->dimension_type_size_len < l + 1)
1013 mi->dimension_type_size
1014 = (HOST_WIDE_INT *) xrealloc (mi->dimension_type_size,
1015 (l + 1) * sizeof (HOST_WIDE_INT));
1016 memset (&mi->dimension_type_size[mi->dimension_type_size_len],
1017 0, (l + 1 - mi->dimension_type_size_len)
1018 * sizeof (HOST_WIDE_INT));
1019 mi->dimension_type_size_len = l + 1;
1021 /* Make sure all the accesses in the same level have the same size
1022 of the type. */
1023 if (!mi->dimension_type_size[l])
1024 mi->dimension_type_size[l] = type_size;
1025 else if (mi->dimension_type_size[l] != type_size)
1026 mark_min_matrix_escape_level (mi, l, stmt);
1030 /* USE_STMT represents a call_expr ,where one of the arguments is the
1031 ssa var that we want to check because it came from some use of matrix
1032 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so
1033 far. */
1035 static void
1036 analyze_accesses_for_call_expr (struct matrix_info *mi, tree use_stmt,
1037 int current_indirect_level)
1039 tree call = get_call_expr_in (use_stmt);
1040 if (call && get_callee_fndecl (call))
1042 if (DECL_FUNCTION_CODE (get_callee_fndecl (call)) != BUILT_IN_FREE)
1044 if (dump_file)
1045 fprintf (dump_file,
1046 "Matrix %s: Function call %s, level %d escapes.\n",
1047 get_name (mi->decl), get_name (get_callee_fndecl (call)),
1048 current_indirect_level);
1049 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1051 else if (mi->free_stmts[current_indirect_level].stmt != NULL
1052 && mi->free_stmts[current_indirect_level].stmt != use_stmt)
1053 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1054 else
1056 /*Record the free statements so we can delete them
1057 later. */
1058 int l = current_indirect_level;
1060 mi->free_stmts[l].stmt = use_stmt;
1061 mi->free_stmts[l].func = current_function_decl;
1066 /* USE_STMT represents a phi node of the ssa var that we want to
1067 check because it came from some use of matrix
1069 We check all the escaping levels that get to the PHI node
1070 and make sure they are all the same escaping;
1071 if not (which is rare) we let the escaping level be the
1072 minimum level that gets into that PHI because starting from
1073 that level we cannot expect the behavior of the indirections.
1074 CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1076 static void
1077 analyze_accesses_for_phi_node (struct matrix_info *mi, tree use_stmt,
1078 int current_indirect_level, sbitmap visited,
1079 bool record_accesses)
1082 struct matrix_access_phi_node tmp_maphi, *maphi, **pmaphi;
1084 tmp_maphi.phi = use_stmt;
1085 if ((maphi = (struct matrix_access_phi_node *)
1086 htab_find (htab_mat_acc_phi_nodes, &tmp_maphi)))
1088 if (maphi->indirection_level == current_indirect_level)
1089 return;
1090 else
1092 int level = MIN (maphi->indirection_level,
1093 current_indirect_level);
1094 int j;
1095 tree t = NULL_TREE;
1097 maphi->indirection_level = level;
1098 for (j = 0; j < PHI_NUM_ARGS (use_stmt); j++)
1100 tree def = PHI_ARG_DEF (use_stmt, j);
1102 if (TREE_CODE (SSA_NAME_DEF_STMT (def)) != PHI_NODE)
1103 t = SSA_NAME_DEF_STMT (def);
1105 mark_min_matrix_escape_level (mi, level, t);
1107 return;
1109 maphi = (struct matrix_access_phi_node *)
1110 xcalloc (1, sizeof (struct matrix_access_phi_node));
1111 maphi->phi = use_stmt;
1112 maphi->indirection_level = current_indirect_level;
1114 /* Insert to hash table. */
1115 pmaphi = (struct matrix_access_phi_node **)
1116 htab_find_slot (htab_mat_acc_phi_nodes, maphi, INSERT);
1117 gcc_assert (pmaphi);
1118 *pmaphi = maphi;
1120 if (!TEST_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt))))
1122 SET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1123 analyze_matrix_accesses (mi, PHI_RESULT (use_stmt),
1124 current_indirect_level, false, visited,
1125 record_accesses);
1126 RESET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1130 /* USE_STMT represents a modify statement (the rhs or lhs include
1131 the ssa var that we want to check because it came from some use of matrix
1133 CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1135 static int
1136 analyze_accesses_for_modify_stmt (struct matrix_info *mi, tree ssa_var,
1137 tree use_stmt, int current_indirect_level,
1138 bool last_op, sbitmap visited,
1139 bool record_accesses)
1142 tree lhs = GIMPLE_STMT_OPERAND (use_stmt, 0);
1143 tree rhs = GIMPLE_STMT_OPERAND (use_stmt, 1);
1144 struct ssa_acc_in_tree lhs_acc, rhs_acc;
1146 memset (&lhs_acc, 0, sizeof (lhs_acc));
1147 memset (&rhs_acc, 0, sizeof (rhs_acc));
1149 lhs_acc.ssa_var = ssa_var;
1150 lhs_acc.t_code = ERROR_MARK;
1151 ssa_accessed_in_tree (lhs, &lhs_acc);
1152 rhs_acc.ssa_var = ssa_var;
1153 rhs_acc.t_code = ERROR_MARK;
1154 ssa_accessed_in_tree (get_inner_of_cast_expr (rhs), &rhs_acc);
1156 /* The SSA must be either in the left side or in the right side,
1157 to understand what is happening.
1158 In case the SSA_NAME is found in both sides we should be escaping
1159 at this level because in this case we cannot calculate the
1160 address correctly. */
1161 if ((lhs_acc.var_found && rhs_acc.var_found
1162 && lhs_acc.t_code == INDIRECT_REF)
1163 || (!rhs_acc.var_found && !lhs_acc.var_found))
1165 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1166 return current_indirect_level;
1168 gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1170 /* If we are storing to the matrix at some level, then mark it as
1171 escaping at that level. */
1172 if (lhs_acc.var_found)
1174 tree def;
1175 int l = current_indirect_level + 1;
1177 gcc_assert (lhs_acc.t_code == INDIRECT_REF);
1178 def = get_inner_of_cast_expr (rhs);
1179 if (TREE_CODE (def) != SSA_NAME)
1180 mark_min_matrix_escape_level (mi, l, use_stmt);
1181 else
1183 def = SSA_NAME_DEF_STMT (def);
1184 analyze_matrix_allocation_site (mi, def, l, visited);
1185 if (record_accesses)
1186 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1187 NULL_TREE, l, true);
1188 update_type_size (mi, use_stmt, NULL, l);
1190 return current_indirect_level;
1192 /* Now, check the right-hand-side, to see how the SSA variable
1193 is used. */
1194 if (rhs_acc.var_found)
1196 /* If we are passing the ssa name to a function call and
1197 the pointer escapes when passed to the function
1198 (not the case of free), then we mark the matrix as
1199 escaping at this level. */
1200 if (rhs_acc.t_code == CALL_EXPR)
1202 analyze_accesses_for_call_expr (mi, use_stmt,
1203 current_indirect_level);
1205 return current_indirect_level;
1207 if (rhs_acc.t_code != INDIRECT_REF
1208 && rhs_acc.t_code != POINTER_PLUS_EXPR && rhs_acc.t_code != SSA_NAME)
1210 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1211 return current_indirect_level;
1213 /* If the access in the RHS has an indirection increase the
1214 indirection level. */
1215 if (rhs_acc.t_code == INDIRECT_REF)
1217 if (record_accesses)
1218 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1219 NULL_TREE,
1220 current_indirect_level, true);
1221 current_indirect_level += 1;
1223 else if (rhs_acc.t_code == POINTER_PLUS_EXPR)
1225 gcc_assert (rhs_acc.second_op);
1226 if (last_op)
1227 /* Currently we support only one PLUS expression on the
1228 SSA_NAME that holds the base address of the current
1229 indirection level; to support more general case there
1230 is a need to hold a stack of expressions and regenerate
1231 the calculation later. */
1232 mark_min_matrix_escape_level (mi, current_indirect_level,
1233 use_stmt);
1234 else
1236 tree index;
1237 tree op1, op2;
1239 op1 = TREE_OPERAND (rhs, 0);
1240 op2 = TREE_OPERAND (rhs, 1);
1242 op2 = (op1 == ssa_var) ? op2 : op1;
1243 if (TREE_CODE (op2) == INTEGER_CST)
1244 index =
1245 build_int_cst (TREE_TYPE (op1),
1246 TREE_INT_CST_LOW (op2) /
1247 int_size_in_bytes (TREE_TYPE (op1)));
1248 else
1250 index =
1251 get_index_from_offset (op2, SSA_NAME_DEF_STMT (op2));
1252 if (index == NULL_TREE)
1254 mark_min_matrix_escape_level (mi,
1255 current_indirect_level,
1256 use_stmt);
1257 return current_indirect_level;
1260 if (record_accesses)
1261 record_access_alloc_site_info (mi, use_stmt, op2,
1262 index,
1263 current_indirect_level, false);
1266 /* If we are storing this level of indirection mark it as
1267 escaping. */
1268 if (lhs_acc.t_code == INDIRECT_REF || TREE_CODE (lhs) != SSA_NAME)
1270 int l = current_indirect_level;
1272 /* One exception is when we are storing to the matrix
1273 variable itself; this is the case of malloc, we must make
1274 sure that it's the one and only one call to malloc so
1275 we call analyze_matrix_allocation_site to check
1276 this out. */
1277 if (TREE_CODE (lhs) != VAR_DECL || lhs != mi->decl)
1278 mark_min_matrix_escape_level (mi, current_indirect_level,
1279 use_stmt);
1280 else
1282 /* Also update the escaping level. */
1283 analyze_matrix_allocation_site (mi, use_stmt, l, visited);
1284 if (record_accesses)
1285 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1286 NULL_TREE, l, true);
1289 else
1291 /* We are placing it in an SSA, follow that SSA. */
1292 analyze_matrix_accesses (mi, lhs,
1293 current_indirect_level,
1294 rhs_acc.t_code == POINTER_PLUS_EXPR,
1295 visited, record_accesses);
1298 return current_indirect_level;
1301 /* Given a SSA_VAR (coming from a use statement of the matrix MI),
1302 follow its uses and level of indirection and find out the minimum
1303 indirection level it escapes in (the highest dimension) and the maximum
1304 level it is accessed in (this will be the actual dimension of the
1305 matrix). The information is accumulated in MI.
1306 We look at the immediate uses, if one escapes we finish; if not,
1307 we make a recursive call for each one of the immediate uses of the
1308 resulting SSA name. */
1309 static void
1310 analyze_matrix_accesses (struct matrix_info *mi, tree ssa_var,
1311 int current_indirect_level, bool last_op,
1312 sbitmap visited, bool record_accesses)
1314 imm_use_iterator imm_iter;
1315 use_operand_p use_p;
1317 update_type_size (mi, SSA_NAME_DEF_STMT (ssa_var), ssa_var,
1318 current_indirect_level);
1320 /* We don't go beyond the escaping level when we are performing the
1321 flattening. NOTE: we keep the last indirection level that doesn't
1322 escape. */
1323 if (mi->min_indirect_level_escape > -1
1324 && mi->min_indirect_level_escape <= current_indirect_level)
1325 return;
1327 /* Now go over the uses of the SSA_NAME and check how it is used in
1328 each one of them. We are mainly looking for the pattern INDIRECT_REF,
1329 then a POINTER_PLUS_EXPR, then INDIRECT_REF etc. while in between there could
1330 be any number of copies and casts. */
1331 gcc_assert (TREE_CODE (ssa_var) == SSA_NAME);
1333 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_var)
1335 tree use_stmt = USE_STMT (use_p);
1336 if (TREE_CODE (use_stmt) == PHI_NODE)
1337 /* We check all the escaping levels that get to the PHI node
1338 and make sure they are all the same escaping;
1339 if not (which is rare) we let the escaping level be the
1340 minimum level that gets into that PHI because starting from
1341 that level we cannot expect the behavior of the indirections. */
1343 analyze_accesses_for_phi_node (mi, use_stmt, current_indirect_level,
1344 visited, record_accesses);
1346 else if (TREE_CODE (use_stmt) == CALL_EXPR)
1347 analyze_accesses_for_call_expr (mi, use_stmt, current_indirect_level);
1348 else if (TREE_CODE (use_stmt) == GIMPLE_MODIFY_STMT)
1349 current_indirect_level =
1350 analyze_accesses_for_modify_stmt (mi, ssa_var, use_stmt,
1351 current_indirect_level, last_op,
1352 visited, record_accesses);
1357 /* A walk_tree function to go over the VAR_DECL, PARM_DECL nodes of
1358 the malloc size expression and check that those aren't changed
1359 over the function. */
1360 static tree
1361 check_var_notmodified_p (tree * tp, int *walk_subtrees, void *data)
1363 basic_block bb;
1364 tree t = *tp;
1365 tree fn = (tree) data;
1366 block_stmt_iterator bsi;
1367 tree stmt;
1369 if (TREE_CODE (t) != VAR_DECL && TREE_CODE (t) != PARM_DECL)
1370 return NULL_TREE;
1372 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (fn))
1374 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1376 stmt = bsi_stmt (bsi);
1377 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
1378 continue;
1379 if (GIMPLE_STMT_OPERAND (stmt, 0) == t)
1380 return stmt;
1383 *walk_subtrees = 1;
1384 return NULL_TREE;
1387 /* Go backwards in the use-def chains and find out the expression
1388 represented by the possible SSA name in EXPR, until it is composed
1389 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1390 we make sure that all the arguments represent the same subexpression,
1391 otherwise we fail. */
1392 static tree
1393 can_calculate_expr_before_stmt (tree expr, sbitmap visited)
1395 tree def_stmt, op1, op2, res;
1397 switch (TREE_CODE (expr))
1399 case SSA_NAME:
1400 /* Case of loop, we don't know to represent this expression. */
1401 if (TEST_BIT (visited, SSA_NAME_VERSION (expr)))
1402 return NULL_TREE;
1404 SET_BIT (visited, SSA_NAME_VERSION (expr));
1405 def_stmt = SSA_NAME_DEF_STMT (expr);
1406 res = can_calculate_expr_before_stmt (def_stmt, visited);
1407 RESET_BIT (visited, SSA_NAME_VERSION (expr));
1408 return res;
1409 case VAR_DECL:
1410 case PARM_DECL:
1411 case INTEGER_CST:
1412 return expr;
1413 case POINTER_PLUS_EXPR:
1414 case PLUS_EXPR:
1415 case MINUS_EXPR:
1416 case MULT_EXPR:
1417 op1 = TREE_OPERAND (expr, 0);
1418 op2 = TREE_OPERAND (expr, 1);
1420 op1 = can_calculate_expr_before_stmt (op1, visited);
1421 if (!op1)
1422 return NULL_TREE;
1423 op2 = can_calculate_expr_before_stmt (op2, visited);
1424 if (op2)
1425 return fold_build2 (TREE_CODE (expr), TREE_TYPE (expr), op1, op2);
1426 return NULL_TREE;
1427 case GIMPLE_MODIFY_STMT:
1428 return can_calculate_expr_before_stmt (GIMPLE_STMT_OPERAND (expr, 1),
1429 visited);
1430 case PHI_NODE:
1432 int j;
1434 res = NULL_TREE;
1435 /* Make sure all the arguments represent the same value. */
1436 for (j = 0; j < PHI_NUM_ARGS (expr); j++)
1438 tree new_res;
1439 tree def = PHI_ARG_DEF (expr, j);
1441 new_res = can_calculate_expr_before_stmt (def, visited);
1442 if (res == NULL_TREE)
1443 res = new_res;
1444 else if (!new_res || !expressions_equal_p (res, new_res))
1445 return NULL_TREE;
1447 return res;
1449 CASE_CONVERT:
1450 res = can_calculate_expr_before_stmt (TREE_OPERAND (expr, 0), visited);
1451 if (res != NULL_TREE)
1452 return build1 (TREE_CODE (expr), TREE_TYPE (expr), res);
1453 else
1454 return NULL_TREE;
1456 default:
1457 return NULL_TREE;
1461 /* There should be only one allocation function for the dimensions
1462 that don't escape. Here we check the allocation sites in this
1463 function. We must make sure that all the dimensions are allocated
1464 using malloc and that the malloc size parameter expression could be
1465 pre-calculated before the call to the malloc of dimension 0.
1467 Given a candidate matrix for flattening -- MI -- check if it's
1468 appropriate for flattening -- we analyze the allocation
1469 sites that we recorded in the previous analysis. The result of the
1470 analysis is a level of indirection (matrix dimension) in which the
1471 flattening is safe. We check the following conditions:
1472 1. There is only one allocation site for each dimension.
1473 2. The allocation sites of all the dimensions are in the same
1474 function.
1475 (The above two are being taken care of during the analysis when
1476 we check the allocation site).
1477 3. All the dimensions that we flatten are allocated at once; thus
1478 the total size must be known before the allocation of the
1479 dimension 0 (top level) -- we must make sure we represent the
1480 size of the allocation as an expression of global parameters or
1481 constants and that those doesn't change over the function. */
1483 static int
1484 check_allocation_function (void **slot, void *data ATTRIBUTE_UNUSED)
1486 int level;
1487 block_stmt_iterator bsi;
1488 basic_block bb_level_0;
1489 struct matrix_info *mi = (struct matrix_info *) *slot;
1490 sbitmap visited;
1492 if (!mi->malloc_for_level)
1493 return 1;
1495 visited = sbitmap_alloc (num_ssa_names);
1497 /* Do nothing if the current function is not the allocation
1498 function of MI. */
1499 if (mi->allocation_function_decl != current_function_decl
1500 /* We aren't in the main allocation function yet. */
1501 || !mi->malloc_for_level[0])
1502 return 1;
1504 for (level = 1; level < mi->max_malloced_level; level++)
1505 if (!mi->malloc_for_level[level])
1506 break;
1508 mark_min_matrix_escape_level (mi, level, NULL_TREE);
1510 bsi = bsi_for_stmt (mi->malloc_for_level[0]);
1511 bb_level_0 = bsi.bb;
1513 /* Check if the expression of the size passed to malloc could be
1514 pre-calculated before the malloc of level 0. */
1515 for (level = 1; level < mi->min_indirect_level_escape; level++)
1517 tree call_stmt, size;
1518 struct malloc_call_data mcd;
1520 call_stmt = mi->malloc_for_level[level];
1522 /* Find the correct malloc information. */
1523 collect_data_for_malloc_call (call_stmt, &mcd);
1525 /* No need to check anticipation for constants. */
1526 if (TREE_CODE (mcd.size_var) == INTEGER_CST)
1528 if (!mi->dimension_size)
1530 mi->dimension_size =
1531 (tree *) xcalloc (mi->min_indirect_level_escape,
1532 sizeof (tree));
1533 mi->dimension_size_orig =
1534 (tree *) xcalloc (mi->min_indirect_level_escape,
1535 sizeof (tree));
1537 mi->dimension_size[level] = mcd.size_var;
1538 mi->dimension_size_orig[level] = mcd.size_var;
1539 continue;
1541 /* ??? Here we should also add the way to calculate the size
1542 expression not only know that it is anticipated. */
1543 sbitmap_zero (visited);
1544 size = can_calculate_expr_before_stmt (mcd.size_var, visited);
1545 if (size == NULL_TREE)
1547 mark_min_matrix_escape_level (mi, level, call_stmt);
1548 if (dump_file)
1549 fprintf (dump_file,
1550 "Matrix %s: Cannot calculate the size of allocation, escaping at level %d\n",
1551 get_name (mi->decl), level);
1552 break;
1554 if (!mi->dimension_size)
1556 mi->dimension_size =
1557 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1558 mi->dimension_size_orig =
1559 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1561 mi->dimension_size[level] = size;
1562 mi->dimension_size_orig[level] = size;
1565 /* We don't need those anymore. */
1566 for (level = mi->min_indirect_level_escape;
1567 level < mi->max_malloced_level; level++)
1568 mi->malloc_for_level[level] = NULL;
1569 return 1;
1572 /* Track all access and allocation sites. */
1573 static void
1574 find_sites_in_func (bool record)
1576 sbitmap visited_stmts_1;
1578 block_stmt_iterator bsi;
1579 tree stmt;
1580 basic_block bb;
1581 struct matrix_info tmpmi, *mi;
1583 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1585 FOR_EACH_BB (bb)
1587 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1589 stmt = bsi_stmt (bsi);
1590 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
1591 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == VAR_DECL)
1593 tmpmi.decl = GIMPLE_STMT_OPERAND (stmt, 0);
1594 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1595 &tmpmi)))
1597 sbitmap_zero (visited_stmts_1);
1598 analyze_matrix_allocation_site (mi, stmt, 0, visited_stmts_1);
1601 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
1602 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME
1603 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1)) == VAR_DECL)
1605 tmpmi.decl = GIMPLE_STMT_OPERAND (stmt, 1);
1606 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1607 &tmpmi)))
1609 sbitmap_zero (visited_stmts_1);
1610 analyze_matrix_accesses (mi,
1611 GIMPLE_STMT_OPERAND (stmt, 0), 0,
1612 false, visited_stmts_1, record);
1617 sbitmap_free (visited_stmts_1);
1620 /* Traverse the use-def chains to see if there are matrices that
1621 are passed through pointers and we cannot know how they are accessed.
1622 For each SSA-name defined by a global variable of our interest,
1623 we traverse the use-def chains of the SSA and follow the indirections,
1624 and record in what level of indirection the use of the variable
1625 escapes. A use of a pointer escapes when it is passed to a function,
1626 stored into memory or assigned (except in malloc and free calls). */
1628 static void
1629 record_all_accesses_in_func (void)
1631 unsigned i;
1632 sbitmap visited_stmts_1;
1634 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1636 for (i = 0; i < num_ssa_names; i++)
1638 struct matrix_info tmpmi, *mi;
1639 tree ssa_var = ssa_name (i);
1640 tree rhs, lhs;
1642 if (!ssa_var
1643 || TREE_CODE (SSA_NAME_DEF_STMT (ssa_var)) != GIMPLE_MODIFY_STMT)
1644 continue;
1645 rhs = GIMPLE_STMT_OPERAND (SSA_NAME_DEF_STMT (ssa_var), 1);
1646 lhs = GIMPLE_STMT_OPERAND (SSA_NAME_DEF_STMT (ssa_var), 0);
1647 if (TREE_CODE (rhs) != VAR_DECL && TREE_CODE (lhs) != VAR_DECL)
1648 continue;
1650 /* If the RHS is a matrix that we want to analyze, follow the def-use
1651 chain for this SSA_VAR and check for escapes or apply the
1652 flattening. */
1653 tmpmi.decl = rhs;
1654 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi)))
1656 /* This variable will track the visited PHI nodes, so we can limit
1657 its size to the maximum number of SSA names. */
1658 sbitmap_zero (visited_stmts_1);
1659 analyze_matrix_accesses (mi, ssa_var,
1660 0, false, visited_stmts_1, true);
1664 sbitmap_free (visited_stmts_1);
1667 /* Used when we want to convert the expression: RESULT = something * ORIG to RESULT = something * NEW. If ORIG and NEW are power of 2, shift operations can be done, else division and multiplication. */
1668 static tree
1669 compute_offset (HOST_WIDE_INT orig, HOST_WIDE_INT new, tree result)
1672 int x, y;
1673 tree result1, ratio, log, orig_tree, new_tree;
1675 x = exact_log2 (orig);
1676 y = exact_log2 (new);
1678 if (x != -1 && y != -1)
1680 if (x == y)
1681 return result;
1682 else if (x > y)
1684 log = build_int_cst (TREE_TYPE (result), x - y);
1685 result1 =
1686 fold_build2 (LSHIFT_EXPR, TREE_TYPE (result), result, log);
1687 return result1;
1689 log = build_int_cst (TREE_TYPE (result), y - x);
1690 result1 = fold_build2 (RSHIFT_EXPR, TREE_TYPE (result), result, log);
1692 return result1;
1694 orig_tree = build_int_cst (TREE_TYPE (result), orig);
1695 new_tree = build_int_cst (TREE_TYPE (result), new);
1696 ratio = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (result), result, orig_tree);
1697 result1 = fold_build2 (MULT_EXPR, TREE_TYPE (result), ratio, new_tree);
1699 return result1;
1703 /* We know that we are allowed to perform matrix flattening (according to the
1704 escape analysis), so we traverse the use-def chains of the SSA vars
1705 defined by the global variables pointing to the matrices of our interest.
1706 in each use of the SSA we calculate the offset from the base address
1707 according to the following equation:
1709 a[I1][I2]...[Ik] , where D1..Dk is the length of each dimension and the
1710 escaping level is m <= k, and a' is the new allocated matrix,
1711 will be translated to :
1713 b[I(m+1)]...[Ik]
1715 where
1716 b = a' + I1*D2...*Dm + I2*D3...Dm + ... + Im
1719 static int
1720 transform_access_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1722 block_stmt_iterator bsi;
1723 struct matrix_info *mi = (struct matrix_info *) *slot;
1724 int min_escape_l = mi->min_indirect_level_escape;
1725 struct access_site_info *acc_info;
1726 int i;
1728 if (min_escape_l < 2 || !mi->access_l)
1729 return 1;
1730 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
1731 i++)
1733 tree orig, type;
1735 /* This is possible because we collect the access sites before
1736 we determine the final minimum indirection level. */
1737 if (acc_info->level >= min_escape_l)
1739 free (acc_info);
1740 continue;
1742 if (acc_info->is_alloc)
1744 if (acc_info->level >= 0 && bb_for_stmt (acc_info->stmt))
1746 ssa_op_iter iter;
1747 tree def;
1748 tree stmt = acc_info->stmt;
1750 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
1751 mark_sym_for_renaming (SSA_NAME_VAR (def));
1752 bsi = bsi_for_stmt (stmt);
1753 gcc_assert (TREE_CODE (acc_info->stmt) == GIMPLE_MODIFY_STMT);
1754 if (TREE_CODE (GIMPLE_STMT_OPERAND (acc_info->stmt, 0)) ==
1755 SSA_NAME && acc_info->level < min_escape_l - 1)
1757 imm_use_iterator imm_iter;
1758 use_operand_p use_p;
1759 tree use_stmt;
1761 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
1762 GIMPLE_STMT_OPERAND (acc_info->stmt,
1764 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1766 tree conv, tmp, stmts;
1768 /* Emit convert statement to convert to type of use. */
1769 conv =
1770 fold_build1 (CONVERT_EXPR,
1771 TREE_TYPE (GIMPLE_STMT_OPERAND
1772 (acc_info->stmt, 0)),
1773 TREE_OPERAND (GIMPLE_STMT_OPERAND
1774 (acc_info->stmt, 1), 0));
1775 tmp =
1776 create_tmp_var (TREE_TYPE
1777 (GIMPLE_STMT_OPERAND
1778 (acc_info->stmt, 0)), "new");
1779 add_referenced_var (tmp);
1780 stmts =
1781 fold_build2 (GIMPLE_MODIFY_STMT,
1782 TREE_TYPE (GIMPLE_STMT_OPERAND
1783 (acc_info->stmt, 0)), tmp,
1784 conv);
1785 tmp = make_ssa_name (tmp, stmts);
1786 GIMPLE_STMT_OPERAND (stmts, 0) = tmp;
1787 bsi = bsi_for_stmt (acc_info->stmt);
1788 bsi_insert_after (&bsi, stmts, BSI_SAME_STMT);
1789 SET_USE (use_p, tmp);
1792 if (acc_info->level < min_escape_l - 1)
1793 bsi_remove (&bsi, true);
1795 free (acc_info);
1796 continue;
1798 orig = GIMPLE_STMT_OPERAND (acc_info->stmt, 1);
1799 type = TREE_TYPE (orig);
1800 if (TREE_CODE (orig) == INDIRECT_REF
1801 && acc_info->level < min_escape_l - 1)
1803 /* Replace the INDIRECT_REF with NOP (cast) usually we are casting
1804 from "pointer to type" to "type". */
1805 orig =
1806 build1 (NOP_EXPR, TREE_TYPE (orig),
1807 GIMPLE_STMT_OPERAND (orig, 0));
1808 GIMPLE_STMT_OPERAND (acc_info->stmt, 1) = orig;
1810 else if (TREE_CODE (orig) == POINTER_PLUS_EXPR
1811 && acc_info->level < (min_escape_l))
1813 imm_use_iterator imm_iter;
1814 use_operand_p use_p;
1816 tree offset;
1817 int k = acc_info->level;
1818 tree num_elements, total_elements;
1819 tree tmp1;
1820 tree d_size = mi->dimension_size[k];
1822 /* We already make sure in the analysis that the first operand
1823 is the base and the second is the offset. */
1824 offset = acc_info->offset;
1825 if (mi->dim_map[k] == min_escape_l - 1)
1827 if (!check_transpose_p || mi->is_transposed_p == false)
1828 tmp1 = offset;
1829 else
1831 tree new_offset;
1832 tree d_type_size, d_type_size_k;
1834 d_type_size = size_int (mi->dimension_type_size[min_escape_l]);
1835 d_type_size_k = size_int (mi->dimension_type_size[k + 1]);
1837 new_offset =
1838 compute_offset (mi->dimension_type_size[min_escape_l],
1839 mi->dimension_type_size[k + 1], offset);
1841 total_elements = new_offset;
1842 if (new_offset != offset)
1844 bsi = bsi_for_stmt (acc_info->stmt);
1845 tmp1 = force_gimple_operand_bsi (&bsi, total_elements,
1846 true, NULL,
1847 true, BSI_SAME_STMT);
1849 else
1850 tmp1 = offset;
1853 else
1855 d_size = mi->dimension_size[mi->dim_map[k] + 1];
1856 num_elements =
1857 fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, acc_info->index),
1858 fold_convert (sizetype, d_size));
1859 add_referenced_var (d_size);
1860 bsi = bsi_for_stmt (acc_info->stmt);
1861 tmp1 = force_gimple_operand_bsi (&bsi, num_elements, true,
1862 NULL, true, BSI_SAME_STMT);
1864 /* Replace the offset if needed. */
1865 if (tmp1 != offset)
1867 if (TREE_CODE (offset) == SSA_NAME)
1869 tree use_stmt;
1871 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, offset)
1872 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1873 if (use_stmt == acc_info->stmt)
1874 SET_USE (use_p, tmp1);
1876 else
1878 gcc_assert (TREE_CODE (offset) == INTEGER_CST);
1879 TREE_OPERAND (orig, 1) = tmp1;
1883 /* ??? meanwhile this happens because we record the same access
1884 site more than once; we should be using a hash table to
1885 avoid this and insert the STMT of the access site only
1886 once.
1887 else
1888 gcc_unreachable (); */
1889 free (acc_info);
1891 VEC_free (access_site_info_p, heap, mi->access_l);
1893 update_ssa (TODO_update_ssa);
1894 #ifdef ENABLE_CHECKING
1895 verify_ssa (true);
1896 #endif
1897 return 1;
1900 /* Sort A array of counts. Arrange DIM_MAP to reflect the new order. */
1902 static void
1903 sort_dim_hot_level (gcov_type * a, int *dim_map, int n)
1905 int i, j, tmp1;
1906 gcov_type tmp;
1908 for (i = 0; i < n - 1; i++)
1910 for (j = 0; j < n - 1 - i; j++)
1912 if (a[j + 1] < a[j])
1914 tmp = a[j]; /* swap a[j] and a[j+1] */
1915 a[j] = a[j + 1];
1916 a[j + 1] = tmp;
1917 tmp1 = dim_map[j];
1918 dim_map[j] = dim_map[j + 1];
1919 dim_map[j + 1] = tmp1;
1925 /* Replace multiple mallocs (one for each dimension) to one malloc
1926 with the size of DIM1*DIM2*...*DIMN*size_of_element
1927 Make sure that we hold the size in the malloc site inside a
1928 new global variable; this way we ensure that the size doesn't
1929 change and it is accessible from all the other functions that
1930 uses the matrix. Also, the original calls to free are deleted,
1931 and replaced by a new call to free the flattened matrix. */
1933 static int
1934 transform_allocation_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1936 int i;
1937 struct matrix_info *mi;
1938 tree type, call_stmt_0, malloc_stmt, oldfn, prev_dim_size, use_stmt;
1939 struct cgraph_node *c_node;
1940 struct cgraph_edge *e;
1941 block_stmt_iterator bsi;
1942 struct malloc_call_data mcd;
1943 HOST_WIDE_INT element_size;
1945 imm_use_iterator imm_iter;
1946 use_operand_p use_p;
1947 tree old_size_0, tmp;
1948 int min_escape_l;
1949 int id;
1951 mi = (struct matrix_info *) *slot;
1953 min_escape_l = mi->min_indirect_level_escape;
1955 if (!mi->malloc_for_level)
1956 mi->min_indirect_level_escape = 0;
1958 if (mi->min_indirect_level_escape < 2)
1959 return 1;
1961 mi->dim_map = (int *) xcalloc (mi->min_indirect_level_escape, sizeof (int));
1962 for (i = 0; i < mi->min_indirect_level_escape; i++)
1963 mi->dim_map[i] = i;
1964 if (check_transpose_p)
1966 int i;
1968 if (dump_file)
1970 fprintf (dump_file, "Matrix %s:\n", get_name (mi->decl));
1971 for (i = 0; i < min_escape_l; i++)
1973 fprintf (dump_file, "dim %d before sort ", i);
1974 if (mi->dim_hot_level)
1975 fprintf (dump_file,
1976 "count is " HOST_WIDEST_INT_PRINT_DEC " \n",
1977 mi->dim_hot_level[i]);
1980 sort_dim_hot_level (mi->dim_hot_level, mi->dim_map,
1981 mi->min_indirect_level_escape);
1982 if (dump_file)
1983 for (i = 0; i < min_escape_l; i++)
1985 fprintf (dump_file, "dim %d after sort\n", i);
1986 if (mi->dim_hot_level)
1987 fprintf (dump_file, "count is " HOST_WIDE_INT_PRINT_DEC
1988 " \n", (HOST_WIDE_INT) mi->dim_hot_level[i]);
1990 for (i = 0; i < mi->min_indirect_level_escape; i++)
1992 if (dump_file)
1993 fprintf (dump_file, "dim_map[%d] after sort %d\n", i,
1994 mi->dim_map[i]);
1995 if (mi->dim_map[i] != i)
1997 if (dump_file)
1998 fprintf (dump_file,
1999 "Transposed dimensions: dim %d is now dim %d\n",
2000 mi->dim_map[i], i);
2001 mi->is_transposed_p = true;
2005 else
2007 for (i = 0; i < mi->min_indirect_level_escape; i++)
2008 mi->dim_map[i] = i;
2010 /* Call statement of allocation site of level 0. */
2011 call_stmt_0 = mi->malloc_for_level[0];
2013 /* Finds the correct malloc information. */
2014 collect_data_for_malloc_call (call_stmt_0, &mcd);
2016 mi->dimension_size[0] = mcd.size_var;
2017 mi->dimension_size_orig[0] = mcd.size_var;
2018 /* Make sure that the variables in the size expression for
2019 all the dimensions (above level 0) aren't modified in
2020 the allocation function. */
2021 for (i = 1; i < mi->min_indirect_level_escape; i++)
2023 tree t;
2025 /* mi->dimension_size must contain the expression of the size calculated
2026 in check_allocation_function. */
2027 gcc_assert (mi->dimension_size[i]);
2029 t = walk_tree_without_duplicates (&(mi->dimension_size[i]),
2030 check_var_notmodified_p,
2031 mi->allocation_function_decl);
2032 if (t != NULL_TREE)
2034 mark_min_matrix_escape_level (mi, i, t);
2035 break;
2039 if (mi->min_indirect_level_escape < 2)
2040 return 1;
2042 /* Since we should make sure that the size expression is available
2043 before the call to malloc of level 0. */
2044 bsi = bsi_for_stmt (call_stmt_0);
2046 /* Find out the size of each dimension by looking at the malloc
2047 sites and create a global variable to hold it.
2048 We add the assignment to the global before the malloc of level 0. */
2050 /* To be able to produce gimple temporaries. */
2051 oldfn = current_function_decl;
2052 current_function_decl = mi->allocation_function_decl;
2053 push_cfun (DECL_STRUCT_FUNCTION (mi->allocation_function_decl));
2055 /* Set the dimension sizes as follows:
2056 DIM_SIZE[i] = DIM_SIZE[n] * ... * DIM_SIZE[i]
2057 where n is the maximum non escaping level. */
2058 element_size = mi->dimension_type_size[mi->min_indirect_level_escape];
2059 prev_dim_size = NULL_TREE;
2061 for (i = mi->min_indirect_level_escape - 1; i >= 0; i--)
2063 tree dim_size, dim_var, tmp;
2064 tree d_type_size;
2066 /* Now put the size expression in a global variable and initialize it to
2067 the size expression before the malloc of level 0. */
2068 dim_var =
2069 add_new_static_var (TREE_TYPE
2070 (mi->dimension_size_orig[mi->dim_map[i]]));
2071 type = TREE_TYPE (mi->dimension_size_orig[mi->dim_map[i]]);
2073 /* DIM_SIZE = MALLOC_SIZE_PARAM / TYPE_SIZE. */
2074 /* Find which dim ID becomes dim I. */
2075 for (id = 0; id < mi->min_indirect_level_escape; id++)
2076 if (mi->dim_map[id] == i)
2077 break;
2078 d_type_size =
2079 build_int_cst (type, mi->dimension_type_size[id + 1]);
2080 if (!prev_dim_size)
2081 prev_dim_size = build_int_cst (type, element_size);
2082 if (!check_transpose_p && i == mi->min_indirect_level_escape - 1)
2084 dim_size = mi->dimension_size_orig[id];
2086 else
2088 dim_size =
2089 fold_build2 (TRUNC_DIV_EXPR, type, mi->dimension_size_orig[id],
2090 d_type_size);
2092 dim_size = fold_build2 (MULT_EXPR, type, dim_size, prev_dim_size);
2094 dim_size = force_gimple_operand_bsi (&bsi, dim_size, true, NULL,
2095 true, BSI_SAME_STMT);
2096 /* GLOBAL_HOLDING_THE_SIZE = DIM_SIZE. */
2097 tmp = fold_build2 (GIMPLE_MODIFY_STMT, type, dim_var, dim_size);
2098 GIMPLE_STMT_OPERAND (tmp, 0) = dim_var;
2099 mark_symbols_for_renaming (tmp);
2100 bsi_insert_before (&bsi, tmp, BSI_SAME_STMT);
2102 prev_dim_size = mi->dimension_size[i] = dim_var;
2104 update_ssa (TODO_update_ssa);
2105 /* Replace the malloc size argument in the malloc of level 0 to be
2106 the size of all the dimensions. */
2107 malloc_stmt = GIMPLE_STMT_OPERAND (call_stmt_0, 1);
2108 c_node = cgraph_node (mi->allocation_function_decl);
2109 old_size_0 = CALL_EXPR_ARG (malloc_stmt, 0);
2110 tmp = force_gimple_operand_bsi (&bsi, mi->dimension_size[0], true,
2111 NULL, true, BSI_SAME_STMT);
2112 if (TREE_CODE (old_size_0) == SSA_NAME)
2114 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, old_size_0)
2115 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2116 if (use_stmt == call_stmt_0)
2117 SET_USE (use_p, tmp);
2119 /* When deleting the calls to malloc we need also to remove the edge from
2120 the call graph to keep it consistent. Notice that cgraph_edge may
2121 create a new node in the call graph if there is no node for the given
2122 declaration; this shouldn't be the case but currently there is no way to
2123 check this outside of "cgraph.c". */
2124 for (i = 1; i < mi->min_indirect_level_escape; i++)
2126 block_stmt_iterator bsi;
2127 tree use_stmt1 = NULL;
2128 tree call;
2130 tree call_stmt = mi->malloc_for_level[i];
2131 call = GIMPLE_STMT_OPERAND (call_stmt, 1);
2132 gcc_assert (TREE_CODE (call) == CALL_EXPR);
2133 e = cgraph_edge (c_node, call_stmt);
2134 gcc_assert (e);
2135 cgraph_remove_edge (e);
2136 bsi = bsi_for_stmt (call_stmt);
2137 /* Remove the call stmt. */
2138 bsi_remove (&bsi, true);
2139 /* remove the type cast stmt. */
2140 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2141 GIMPLE_STMT_OPERAND (call_stmt, 0))
2143 use_stmt1 = use_stmt;
2144 bsi = bsi_for_stmt (use_stmt);
2145 bsi_remove (&bsi, true);
2147 /* Remove the assignment of the allocated area. */
2148 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2149 GIMPLE_STMT_OPERAND (use_stmt1, 0))
2151 bsi = bsi_for_stmt (use_stmt);
2152 bsi_remove (&bsi, true);
2155 update_ssa (TODO_update_ssa);
2156 #ifdef ENABLE_CHECKING
2157 verify_ssa (true);
2158 #endif
2159 /* Delete the calls to free. */
2160 for (i = 1; i < mi->min_indirect_level_escape; i++)
2162 block_stmt_iterator bsi;
2163 tree call;
2165 /* ??? wonder why this case is possible but we failed on it once. */
2166 if (!mi->free_stmts[i].stmt)
2167 continue;
2169 call = TREE_OPERAND (mi->free_stmts[i].stmt, 1);
2170 c_node = cgraph_node (mi->free_stmts[i].func);
2172 gcc_assert (TREE_CODE (mi->free_stmts[i].stmt) == CALL_EXPR);
2173 e = cgraph_edge (c_node, mi->free_stmts[i].stmt);
2174 gcc_assert (e);
2175 cgraph_remove_edge (e);
2176 current_function_decl = mi->free_stmts[i].func;
2177 set_cfun (DECL_STRUCT_FUNCTION (mi->free_stmts[i].func));
2178 bsi = bsi_for_stmt (mi->free_stmts[i].stmt);
2179 bsi_remove (&bsi, true);
2181 /* Return to the previous situation. */
2182 current_function_decl = oldfn;
2183 pop_cfun ();
2184 return 1;
2189 /* Print out the results of the escape analysis. */
2190 static int
2191 dump_matrix_reorg_analysis (void **slot, void *data ATTRIBUTE_UNUSED)
2193 struct matrix_info *mi = (struct matrix_info *) *slot;
2195 if (!dump_file)
2196 return 1;
2197 fprintf (dump_file, "Matrix \"%s\"; Escaping Level: %d, Num Dims: %d,",
2198 get_name (mi->decl), mi->min_indirect_level_escape, mi->num_dims);
2199 fprintf (dump_file, " Malloc Dims: %d, ", mi->max_malloced_level);
2200 fprintf (dump_file, "\n");
2201 if (mi->min_indirect_level_escape >= 2)
2202 fprintf (dump_file, "Flattened %d dimensions \n",
2203 mi->min_indirect_level_escape);
2204 return 1;
2208 /* Perform matrix flattening. */
2210 static unsigned int
2211 matrix_reorg (void)
2213 struct cgraph_node *node;
2215 if (profile_info)
2216 check_transpose_p = true;
2217 else
2218 check_transpose_p = false;
2219 /* If there are hand written vectors, we skip this optimization. */
2220 for (node = cgraph_nodes; node; node = node->next)
2221 if (!may_flatten_matrices (node))
2222 return 0;
2223 matrices_to_reorg = htab_create (37, mtt_info_hash, mtt_info_eq, mat_free);
2224 /* Find and record all potential matrices in the program. */
2225 find_matrices_decl ();
2226 /* Analyze the accesses of the matrices (escaping analysis). */
2227 for (node = cgraph_nodes; node; node = node->next)
2228 if (node->analyzed)
2230 tree temp_fn;
2232 temp_fn = current_function_decl;
2233 current_function_decl = node->decl;
2234 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2235 bitmap_obstack_initialize (NULL);
2236 tree_register_cfg_hooks ();
2238 if (!gimple_in_ssa_p (cfun))
2240 free_dominance_info (CDI_DOMINATORS);
2241 free_dominance_info (CDI_POST_DOMINATORS);
2242 pop_cfun ();
2243 current_function_decl = temp_fn;
2244 bitmap_obstack_release (NULL);
2246 return 0;
2249 #ifdef ENABLE_CHECKING
2250 verify_flow_info ();
2251 #endif
2253 if (!matrices_to_reorg)
2255 free_dominance_info (CDI_DOMINATORS);
2256 free_dominance_info (CDI_POST_DOMINATORS);
2257 pop_cfun ();
2258 current_function_decl = temp_fn;
2259 bitmap_obstack_release (NULL);
2261 return 0;
2264 /* Create htap for phi nodes. */
2265 htab_mat_acc_phi_nodes = htab_create (37, mat_acc_phi_hash,
2266 mat_acc_phi_eq, free);
2267 if (!check_transpose_p)
2268 find_sites_in_func (false);
2269 else
2271 find_sites_in_func (true);
2272 loop_optimizer_init (LOOPS_NORMAL);
2273 if (current_loops)
2274 scev_initialize ();
2275 htab_traverse (matrices_to_reorg, analyze_transpose, NULL);
2276 if (current_loops)
2278 scev_finalize ();
2279 loop_optimizer_finalize ();
2280 current_loops = NULL;
2283 /* If the current function is the allocation function for any of
2284 the matrices we check its allocation and the escaping level. */
2285 htab_traverse (matrices_to_reorg, check_allocation_function, NULL);
2286 free_dominance_info (CDI_DOMINATORS);
2287 free_dominance_info (CDI_POST_DOMINATORS);
2288 pop_cfun ();
2289 current_function_decl = temp_fn;
2290 bitmap_obstack_release (NULL);
2292 htab_traverse (matrices_to_reorg, transform_allocation_sites, NULL);
2293 /* Now transform the accesses. */
2294 for (node = cgraph_nodes; node; node = node->next)
2295 if (node->analyzed)
2297 /* Remember that allocation sites have been handled. */
2298 tree temp_fn;
2300 temp_fn = current_function_decl;
2301 current_function_decl = node->decl;
2302 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2303 bitmap_obstack_initialize (NULL);
2304 tree_register_cfg_hooks ();
2305 record_all_accesses_in_func ();
2306 htab_traverse (matrices_to_reorg, transform_access_sites, NULL);
2307 free_dominance_info (CDI_DOMINATORS);
2308 free_dominance_info (CDI_POST_DOMINATORS);
2309 pop_cfun ();
2310 current_function_decl = temp_fn;
2311 bitmap_obstack_release (NULL);
2313 htab_traverse (matrices_to_reorg, dump_matrix_reorg_analysis, NULL);
2315 current_function_decl = NULL;
2316 set_cfun (NULL);
2317 matrices_to_reorg = NULL;
2318 return 0;
2322 /* The condition for matrix flattening to be performed. */
2323 static bool
2324 gate_matrix_reorg (void)
2326 return flag_ipa_matrix_reorg && flag_whole_program;
2329 struct simple_ipa_opt_pass pass_ipa_matrix_reorg =
2332 SIMPLE_IPA_PASS,
2333 "matrix-reorg", /* name */
2334 gate_matrix_reorg, /* gate */
2335 matrix_reorg, /* execute */
2336 NULL, /* sub */
2337 NULL, /* next */
2338 0, /* static_pass_number */
2339 0, /* tv_id */
2340 0, /* properties_required */
2341 PROP_trees, /* properties_provided */
2342 0, /* properties_destroyed */
2343 0, /* todo_flags_start */
2344 TODO_dump_cgraph | TODO_dump_func /* todo_flags_finish */