Merge from trunk: 215733-215743
[official-gcc.git] / gcc-4_6_3-mobile / gcc / matrix-reorg.c
blobc3031430e83086a2b645545e356af9be9d61bcaf
1 /* Matrix layout transformations.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
3 Contributed by Razya Ladelsky <razya@il.ibm.com>
4 Originally written by Revital Eres and Mustafa Hagog.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
23 Matrix flattening optimization tries to replace a N-dimensional
24 matrix with its equivalent M-dimensional matrix, where M < N.
25 This first implementation focuses on global matrices defined dynamically.
27 When N==1, we actually flatten the whole matrix.
28 For instance consider a two-dimensional array a [dim1] [dim2].
29 The code for allocating space for it usually looks like:
31 a = (int **) malloc(dim1 * sizeof(int *));
32 for (i=0; i<dim1; i++)
33 a[i] = (int *) malloc (dim2 * sizeof(int));
35 If the array "a" is found suitable for this optimization,
36 its allocation is replaced by:
38 a = (int *) malloc (dim1 * dim2 *sizeof(int));
40 and all the references to a[i][j] are replaced by a[i * dim2 + j].
42 The two main phases of the optimization are the analysis
43 and transformation.
44 The driver of the optimization is matrix_reorg ().
48 Analysis phase:
49 ===============
51 We'll number the dimensions outside-in, meaning the most external
52 is 0, then 1, and so on.
53 The analysis part of the optimization determines K, the escape
54 level of a N-dimensional matrix (K <= N), that allows flattening of
55 the external dimensions 0,1,..., K-1. Escape level 0 means that the
56 whole matrix escapes and no flattening is possible.
58 The analysis part is implemented in analyze_matrix_allocation_site()
59 and analyze_matrix_accesses().
61 Transformation phase:
62 =====================
63 In this phase we define the new flattened matrices that replace the
64 original matrices in the code.
65 Implemented in transform_allocation_sites(),
66 transform_access_sites().
68 Matrix Transposing
69 ==================
70 The idea of Matrix Transposing is organizing the matrix in a different
71 layout such that the dimensions are reordered.
72 This could produce better cache behavior in some cases.
74 For example, lets look at the matrix accesses in the following loop:
76 for (i=0; i<N; i++)
77 for (j=0; j<M; j++)
78 access to a[i][j]
80 This loop can produce good cache behavior because the elements of
81 the inner dimension are accessed sequentially.
83 However, if the accesses of the matrix were of the following form:
85 for (i=0; i<N; i++)
86 for (j=0; j<M; j++)
87 access to a[j][i]
89 In this loop we iterate the columns and not the rows.
90 Therefore, replacing the rows and columns
91 would have had an organization with better (cache) locality.
92 Replacing the dimensions of the matrix is called matrix transposing.
94 This example, of course, could be enhanced to multiple dimensions matrices
95 as well.
97 Since a program could include all kind of accesses, there is a decision
98 mechanism, implemented in analyze_transpose(), which implements a
99 heuristic that tries to determine whether to transpose the matrix or not,
100 according to the form of the more dominant accesses.
101 This decision is transferred to the flattening mechanism, and whether
102 the matrix was transposed or not, the matrix is flattened (if possible).
104 This decision making is based on profiling information and loop information.
105 If profiling information is available, decision making mechanism will be
106 operated, otherwise the matrix will only be flattened (if possible).
108 Both optimizations are described in the paper "Matrix flattening and
109 transposing in GCC" which was presented in GCC summit 2006.
110 http://www.gccsummit.org/2006/2006-GCC-Summit-Proceedings.pdf. */
112 #include "config.h"
113 #include "system.h"
114 #include "coretypes.h"
115 #include "tm.h"
116 #include "tree.h"
117 #include "rtl.h"
118 #include "tree-inline.h"
119 #include "tree-flow.h"
120 #include "tree-flow-inline.h"
121 #include "langhooks.h"
122 #include "hashtab.h"
123 #include "flags.h"
124 #include "ggc.h"
125 #include "debug.h"
126 #include "target.h"
127 #include "cgraph.h"
128 #include "diagnostic-core.h"
129 #include "timevar.h"
130 #include "params.h"
131 #include "fibheap.h"
132 #include "intl.h"
133 #include "function.h"
134 #include "basic-block.h"
135 #include "cfgloop.h"
136 #include "tree-iterator.h"
137 #include "tree-pass.h"
138 #include "opts.h"
139 #include "tree-data-ref.h"
140 #include "tree-chrec.h"
141 #include "tree-scalar-evolution.h"
142 #include "tree-ssa-sccvn.h"
144 /* We need to collect a lot of data from the original malloc,
145 particularly as the gimplifier has converted:
147 orig_var = (struct_type *) malloc (x * sizeof (struct_type *));
149 into
151 T3 = <constant> ; ** <constant> is amount to malloc; precomputed **
152 T4 = malloc (T3);
153 T5 = (struct_type *) T4;
154 orig_var = T5;
156 The following struct fields allow us to collect all the necessary data from
157 the gimplified program. The comments in the struct below are all based
158 on the gimple example above. */
160 struct malloc_call_data
162 gimple call_stmt; /* Tree for "T4 = malloc (T3);" */
163 tree size_var; /* Var decl for T3. */
164 tree malloc_size; /* Tree for "<constant>", the rhs assigned to T3. */
167 static tree can_calculate_expr_before_stmt (tree, sbitmap);
168 static tree can_calculate_stmt_before_stmt (gimple, sbitmap);
170 /* The front end of the compiler, when parsing statements of the form:
172 var = (type_cast) malloc (sizeof (type));
174 always converts this single statement into the following statements
175 (GIMPLE form):
177 T.1 = sizeof (type);
178 T.2 = malloc (T.1);
179 T.3 = (type_cast) T.2;
180 var = T.3;
182 Since we need to create new malloc statements and modify the original
183 statements somewhat, we need to find all four of the above statements.
184 Currently record_call_1 (called for building cgraph edges) finds and
185 records the statements containing the actual call to malloc, but we
186 need to find the rest of the variables/statements on our own. That
187 is what the following function does. */
188 static void
189 collect_data_for_malloc_call (gimple stmt, struct malloc_call_data *m_data)
191 tree size_var = NULL;
192 tree malloc_fn_decl;
193 tree arg1;
195 gcc_assert (is_gimple_call (stmt));
197 malloc_fn_decl = gimple_call_fndecl (stmt);
198 if (malloc_fn_decl == NULL
199 || DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
200 return;
202 arg1 = gimple_call_arg (stmt, 0);
203 size_var = arg1;
205 m_data->call_stmt = stmt;
206 m_data->size_var = size_var;
207 if (TREE_CODE (size_var) != VAR_DECL)
208 m_data->malloc_size = size_var;
209 else
210 m_data->malloc_size = NULL_TREE;
213 /* Information about matrix access site.
214 For example: if an access site of matrix arr is arr[i][j]
215 the ACCESS_SITE_INFO structure will have the address
216 of arr as its stmt. The INDEX_INFO will hold information about the
217 initial address and index of each dimension. */
218 struct access_site_info
220 /* The statement (MEM_REF or POINTER_PLUS_EXPR). */
221 gimple stmt;
223 /* In case of POINTER_PLUS_EXPR, what is the offset. */
224 tree offset;
226 /* The index which created the offset. */
227 tree index;
229 /* The indirection level of this statement. */
230 int level;
232 /* TRUE for allocation site FALSE for access site. */
233 bool is_alloc;
235 /* The function containing the access site. */
236 tree function_decl;
238 /* This access is iterated in the inner most loop */
239 bool iterated_by_inner_most_loop_p;
242 typedef struct access_site_info *access_site_info_p;
243 DEF_VEC_P (access_site_info_p);
244 DEF_VEC_ALLOC_P (access_site_info_p, heap);
246 /* Calls to free when flattening a matrix. */
248 struct free_info
250 gimple stmt;
251 tree func;
254 /* Information about matrix to flatten. */
255 struct matrix_info
257 /* Decl tree of this matrix. */
258 tree decl;
259 /* Number of dimensions; number
260 of "*" in the type declaration. */
261 int num_dims;
263 /* Minimum indirection level that escapes, 0 means that
264 the whole matrix escapes, k means that dimensions
265 0 to ACTUAL_DIM - k escapes. */
266 int min_indirect_level_escape;
268 gimple min_indirect_level_escape_stmt;
270 /* Hold the allocation site for each level (dimension).
271 We can use NUM_DIMS as the upper bound and allocate the array
272 once with this number of elements and no need to use realloc and
273 MAX_MALLOCED_LEVEL. */
274 gimple *malloc_for_level;
276 int max_malloced_level;
278 /* Is the matrix transposed. */
279 bool is_transposed_p;
281 /* The location of the allocation sites (they must be in one
282 function). */
283 tree allocation_function_decl;
285 /* The calls to free for each level of indirection. */
286 struct free_info *free_stmts;
288 /* An array which holds for each dimension its size. where
289 dimension 0 is the outer most (one that contains all the others).
291 tree *dimension_size;
293 /* An array which holds for each dimension it's original size
294 (before transposing and flattening take place). */
295 tree *dimension_size_orig;
297 /* An array which holds for each dimension the size of the type of
298 of elements accessed in that level (in bytes). */
299 HOST_WIDE_INT *dimension_type_size;
301 int dimension_type_size_len;
303 /* An array collecting the count of accesses for each dimension. */
304 gcov_type *dim_hot_level;
306 /* An array of the accesses to be flattened.
307 elements are of type "struct access_site_info *". */
308 VEC (access_site_info_p, heap) * access_l;
310 /* A map of how the dimensions will be organized at the end of
311 the analyses. */
312 int *dim_map;
315 /* In each phi node we want to record the indirection level we have when we
316 get to the phi node. Usually we will have phi nodes with more than two
317 arguments, then we must assure that all of them get to the phi node with
318 the same indirection level, otherwise it's not safe to do the flattening.
319 So we record the information regarding the indirection level each time we
320 get to the phi node in this hash table. */
322 struct matrix_access_phi_node
324 gimple phi;
325 int indirection_level;
328 /* We use this structure to find if the SSA variable is accessed inside the
329 tree and record the tree containing it. */
331 struct ssa_acc_in_tree
333 /* The variable whose accesses in the tree we are looking for. */
334 tree ssa_var;
335 /* The tree and code inside it the ssa_var is accessed, currently
336 it could be an MEM_REF or CALL_EXPR. */
337 enum tree_code t_code;
338 tree t_tree;
339 /* The place in the containing tree. */
340 tree *tp;
341 tree second_op;
342 bool var_found;
345 static void analyze_matrix_accesses (struct matrix_info *, tree, int, bool,
346 sbitmap, bool);
347 static int transform_allocation_sites (void **, void *);
348 static int transform_access_sites (void **, void *);
349 static int analyze_transpose (void **, void *);
350 static int dump_matrix_reorg_analysis (void **, void *);
352 static bool check_transpose_p;
354 /* Hash function used for the phi nodes. */
356 static hashval_t
357 mat_acc_phi_hash (const void *p)
359 const struct matrix_access_phi_node *const ma_phi =
360 (const struct matrix_access_phi_node *) p;
362 return htab_hash_pointer (ma_phi->phi);
365 /* Equality means phi node pointers are the same. */
367 static int
368 mat_acc_phi_eq (const void *p1, const void *p2)
370 const struct matrix_access_phi_node *const phi1 =
371 (const struct matrix_access_phi_node *) p1;
372 const struct matrix_access_phi_node *const phi2 =
373 (const struct matrix_access_phi_node *) p2;
375 if (phi1->phi == phi2->phi)
376 return 1;
378 return 0;
381 /* Hold the PHI nodes we visit during the traversal for escaping
382 analysis. */
383 static htab_t htab_mat_acc_phi_nodes = NULL;
385 /* This hash-table holds the information about the matrices we are
386 going to handle. */
387 static htab_t matrices_to_reorg = NULL;
389 /* Return a hash for MTT, which is really a "matrix_info *". */
390 static hashval_t
391 mtt_info_hash (const void *mtt)
393 return htab_hash_pointer (((const struct matrix_info *) mtt)->decl);
396 /* Return true if MTT1 and MTT2 (which are really both of type
397 "matrix_info *") refer to the same decl. */
398 static int
399 mtt_info_eq (const void *mtt1, const void *mtt2)
401 const struct matrix_info *const i1 = (const struct matrix_info *) mtt1;
402 const struct matrix_info *const i2 = (const struct matrix_info *) mtt2;
404 if (i1->decl == i2->decl)
405 return true;
407 return false;
410 /* Return false if STMT may contain a vector expression.
411 In this situation, all matrices should not be flattened. */
412 static bool
413 may_flatten_matrices_1 (gimple stmt)
415 switch (gimple_code (stmt))
417 case GIMPLE_ASSIGN:
418 case GIMPLE_CALL:
419 if (!gimple_has_lhs (stmt))
420 return true;
421 if (TREE_CODE (TREE_TYPE (gimple_get_lhs (stmt))) == VECTOR_TYPE)
423 if (dump_file)
424 fprintf (dump_file,
425 "Found vector type, don't flatten matrix\n");
426 return false;
428 break;
429 case GIMPLE_ASM:
430 /* Asm code could contain vector operations. */
431 return false;
432 break;
433 default:
434 break;
436 return true;
439 /* Return false if there are hand-written vectors in the program.
440 We disable the flattening in such a case. */
441 static bool
442 may_flatten_matrices (struct cgraph_node *node)
444 tree decl;
445 struct function *func;
446 basic_block bb;
447 gimple_stmt_iterator gsi;
449 decl = node->decl;
450 if (node->analyzed)
452 func = DECL_STRUCT_FUNCTION (decl);
453 FOR_EACH_BB_FN (bb, func)
454 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
455 if (!may_flatten_matrices_1 (gsi_stmt (gsi)))
456 return false;
458 return true;
461 /* Given a VAR_DECL, check its type to determine whether it is
462 a definition of a dynamic allocated matrix and therefore is
463 a suitable candidate for the matrix flattening optimization.
464 Return NULL if VAR_DECL is not such decl. Otherwise, allocate
465 a MATRIX_INFO structure, fill it with the relevant information
466 and return a pointer to it.
467 TODO: handle also statically defined arrays. */
468 static struct matrix_info *
469 analyze_matrix_decl (tree var_decl)
471 struct matrix_info *m_node, tmpmi, *mi;
472 tree var_type;
473 int dim_num = 0;
475 gcc_assert (matrices_to_reorg);
477 if (TREE_CODE (var_decl) == PARM_DECL)
478 var_type = DECL_ARG_TYPE (var_decl);
479 else if (TREE_CODE (var_decl) == VAR_DECL)
480 var_type = TREE_TYPE (var_decl);
481 else
482 return NULL;
484 if (!POINTER_TYPE_P (var_type))
485 return NULL;
487 while (POINTER_TYPE_P (var_type))
489 var_type = TREE_TYPE (var_type);
490 dim_num++;
493 if (dim_num <= 1)
494 return NULL;
496 if (!COMPLETE_TYPE_P (var_type)
497 || TREE_CODE (TYPE_SIZE_UNIT (var_type)) != INTEGER_CST)
498 return NULL;
500 /* Check to see if this pointer is already in there. */
501 tmpmi.decl = var_decl;
502 mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi);
504 if (mi)
505 return NULL;
507 /* Record the matrix. */
509 m_node = (struct matrix_info *) xcalloc (1, sizeof (struct matrix_info));
510 m_node->decl = var_decl;
511 m_node->num_dims = dim_num;
512 m_node->free_stmts
513 = (struct free_info *) xcalloc (dim_num, sizeof (struct free_info));
515 /* Init min_indirect_level_escape to -1 to indicate that no escape
516 analysis has been done yet. */
517 m_node->min_indirect_level_escape = -1;
518 m_node->is_transposed_p = false;
520 return m_node;
523 /* Free matrix E. */
524 static void
525 mat_free (void *e)
527 struct matrix_info *mat = (struct matrix_info *) e;
529 if (!mat)
530 return;
532 if (mat->free_stmts)
533 free (mat->free_stmts);
534 if (mat->dim_hot_level)
535 free (mat->dim_hot_level);
536 if (mat->malloc_for_level)
537 free (mat->malloc_for_level);
540 /* Find all potential matrices.
541 TODO: currently we handle only multidimensional
542 dynamically allocated arrays. */
543 static void
544 find_matrices_decl (void)
546 struct matrix_info *tmp;
547 PTR *slot;
548 struct varpool_node *vnode;
550 gcc_assert (matrices_to_reorg);
552 /* For every global variable in the program:
553 Check to see if it's of a candidate type and record it. */
554 for (vnode = varpool_nodes_queue; vnode; vnode = vnode->next_needed)
556 tree var_decl = vnode->decl;
558 if (!var_decl || TREE_CODE (var_decl) != VAR_DECL)
559 continue;
561 if (matrices_to_reorg)
562 if ((tmp = analyze_matrix_decl (var_decl)))
564 if (!TREE_ADDRESSABLE (var_decl))
566 slot = htab_find_slot (matrices_to_reorg, tmp, INSERT);
567 *slot = tmp;
571 return;
574 /* Mark that the matrix MI escapes at level L. */
575 static void
576 mark_min_matrix_escape_level (struct matrix_info *mi, int l, gimple s)
578 if (mi->min_indirect_level_escape == -1
579 || (mi->min_indirect_level_escape > l))
581 mi->min_indirect_level_escape = l;
582 mi->min_indirect_level_escape_stmt = s;
586 /* Find if the SSA variable is accessed inside the
587 tree and record the tree containing it.
588 The only relevant uses are the case of SSA_NAME, or SSA inside
589 MEM_REF, PLUS_EXPR, POINTER_PLUS_EXPR, MULT_EXPR. */
590 static void
591 ssa_accessed_in_tree (tree t, struct ssa_acc_in_tree *a)
593 a->t_code = TREE_CODE (t);
594 switch (a->t_code)
596 case SSA_NAME:
597 if (t == a->ssa_var)
598 a->var_found = true;
599 break;
600 case MEM_REF:
601 if (SSA_VAR_P (TREE_OPERAND (t, 0))
602 && TREE_OPERAND (t, 0) == a->ssa_var)
603 a->var_found = true;
604 break;
605 default:
606 break;
610 /* Find if the SSA variable is accessed on the right hand side of
611 gimple call STMT. */
613 static void
614 ssa_accessed_in_call_rhs (gimple stmt, struct ssa_acc_in_tree *a)
616 tree decl;
617 tree arg;
618 size_t i;
620 a->t_code = CALL_EXPR;
621 for (i = 0; i < gimple_call_num_args (stmt); i++)
623 arg = gimple_call_arg (stmt, i);
624 if (arg == a->ssa_var)
626 a->var_found = true;
627 decl = gimple_call_fndecl (stmt);
628 a->t_tree = decl;
629 break;
634 /* Find if the SSA variable is accessed on the right hand side of
635 gimple assign STMT. */
637 static void
638 ssa_accessed_in_assign_rhs (gimple stmt, struct ssa_acc_in_tree *a)
641 a->t_code = gimple_assign_rhs_code (stmt);
642 switch (a->t_code)
644 tree op1, op2;
646 case SSA_NAME:
647 case MEM_REF:
648 CASE_CONVERT:
649 case VIEW_CONVERT_EXPR:
650 ssa_accessed_in_tree (gimple_assign_rhs1 (stmt), a);
651 break;
652 case POINTER_PLUS_EXPR:
653 case PLUS_EXPR:
654 case MULT_EXPR:
655 op1 = gimple_assign_rhs1 (stmt);
656 op2 = gimple_assign_rhs2 (stmt);
658 if (op1 == a->ssa_var)
660 a->var_found = true;
661 a->second_op = op2;
663 else if (op2 == a->ssa_var)
665 a->var_found = true;
666 a->second_op = op1;
668 break;
669 default:
670 break;
674 /* Record the access/allocation site information for matrix MI so we can
675 handle it later in transformation. */
676 static void
677 record_access_alloc_site_info (struct matrix_info *mi, gimple stmt, tree offset,
678 tree index, int level, bool is_alloc)
680 struct access_site_info *acc_info;
682 if (!mi->access_l)
683 mi->access_l = VEC_alloc (access_site_info_p, heap, 100);
685 acc_info
686 = (struct access_site_info *)
687 xcalloc (1, sizeof (struct access_site_info));
688 acc_info->stmt = stmt;
689 acc_info->offset = offset;
690 acc_info->index = index;
691 acc_info->function_decl = current_function_decl;
692 acc_info->level = level;
693 acc_info->is_alloc = is_alloc;
695 VEC_safe_push (access_site_info_p, heap, mi->access_l, acc_info);
699 /* Record the malloc as the allocation site of the given LEVEL. But
700 first we Make sure that all the size parameters passed to malloc in
701 all the allocation sites could be pre-calculated before the call to
702 the malloc of level 0 (the main malloc call). */
703 static void
704 add_allocation_site (struct matrix_info *mi, gimple stmt, int level)
706 struct malloc_call_data mcd;
708 /* Make sure that the allocation sites are in the same function. */
709 if (!mi->allocation_function_decl)
710 mi->allocation_function_decl = current_function_decl;
711 else if (mi->allocation_function_decl != current_function_decl)
713 int min_malloc_level;
715 gcc_assert (mi->malloc_for_level);
717 /* Find the minimum malloc level that already has been seen;
718 we known its allocation function must be
719 MI->allocation_function_decl since it's different than
720 CURRENT_FUNCTION_DECL then the escaping level should be
721 MIN (LEVEL, MIN_MALLOC_LEVEL) - 1 , and the allocation function
722 must be set accordingly. */
723 for (min_malloc_level = 0;
724 min_malloc_level < mi->max_malloced_level
725 && mi->malloc_for_level[min_malloc_level]; min_malloc_level++);
726 if (level < min_malloc_level)
728 mi->allocation_function_decl = current_function_decl;
729 mark_min_matrix_escape_level (mi, min_malloc_level, stmt);
731 else
733 mark_min_matrix_escape_level (mi, level, stmt);
734 /* cannot be that (level == min_malloc_level)
735 we would have returned earlier. */
736 return;
740 /* Find the correct malloc information. */
741 collect_data_for_malloc_call (stmt, &mcd);
743 /* We accept only calls to malloc function; we do not accept
744 calls like calloc and realloc. */
745 if (!mi->malloc_for_level)
747 mi->malloc_for_level = XCNEWVEC (gimple, level + 1);
748 mi->max_malloced_level = level + 1;
750 else if (mi->max_malloced_level <= level)
752 mi->malloc_for_level
753 = XRESIZEVEC (gimple, mi->malloc_for_level, level + 1);
755 /* Zero the newly allocated items. */
756 memset (&(mi->malloc_for_level[mi->max_malloced_level + 1]),
757 0, (level - mi->max_malloced_level) * sizeof (tree));
759 mi->max_malloced_level = level + 1;
761 mi->malloc_for_level[level] = stmt;
764 /* Given an assignment statement STMT that we know that its
765 left-hand-side is the matrix MI variable, we traverse the immediate
766 uses backwards until we get to a malloc site. We make sure that
767 there is one and only one malloc site that sets this variable. When
768 we are performing the flattening we generate a new variable that
769 will hold the size for each dimension; each malloc that allocates a
770 dimension has the size parameter; we use that parameter to
771 initialize the dimension size variable so we can use it later in
772 the address calculations. LEVEL is the dimension we're inspecting.
773 Return if STMT is related to an allocation site. */
775 static void
776 analyze_matrix_allocation_site (struct matrix_info *mi, gimple stmt,
777 int level, sbitmap visited)
779 if (gimple_assign_copy_p (stmt) || gimple_assign_cast_p (stmt))
781 tree rhs = gimple_assign_rhs1 (stmt);
783 if (TREE_CODE (rhs) == SSA_NAME)
785 gimple def = SSA_NAME_DEF_STMT (rhs);
787 analyze_matrix_allocation_site (mi, def, level, visited);
788 return;
790 /* If we are back to the original matrix variable then we
791 are sure that this is analyzed as an access site. */
792 else if (rhs == mi->decl)
793 return;
795 /* A result of call to malloc. */
796 else if (is_gimple_call (stmt))
798 int call_flags = gimple_call_flags (stmt);
800 if (!(call_flags & ECF_MALLOC))
802 mark_min_matrix_escape_level (mi, level, stmt);
803 return;
805 else
807 tree malloc_fn_decl;
809 malloc_fn_decl = gimple_call_fndecl (stmt);
810 if (malloc_fn_decl == NULL_TREE)
812 mark_min_matrix_escape_level (mi, level, stmt);
813 return;
815 if (DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
817 if (dump_file)
818 fprintf (dump_file,
819 "Matrix %s is an argument to function %s\n",
820 get_name (mi->decl), get_name (malloc_fn_decl));
821 mark_min_matrix_escape_level (mi, level, stmt);
822 return;
825 /* This is a call to malloc of level 'level'.
826 mi->max_malloced_level-1 == level means that we've
827 seen a malloc statement of level 'level' before.
828 If the statement is not the same one that we've
829 seen before, then there's another malloc statement
830 for the same level, which means that we need to mark
831 it escaping. */
832 if (mi->malloc_for_level
833 && mi->max_malloced_level-1 == level
834 && mi->malloc_for_level[level] != stmt)
836 mark_min_matrix_escape_level (mi, level, stmt);
837 return;
839 else
840 add_allocation_site (mi, stmt, level);
841 return;
843 /* Looks like we don't know what is happening in this
844 statement so be in the safe side and mark it as escaping. */
845 mark_min_matrix_escape_level (mi, level, stmt);
848 /* The transposing decision making.
849 In order to to calculate the profitability of transposing, we collect two
850 types of information regarding the accesses:
851 1. profiling information used to express the hotness of an access, that
852 is how often the matrix is accessed by this access site (count of the
853 access site).
854 2. which dimension in the access site is iterated by the inner
855 most loop containing this access.
857 The matrix will have a calculated value of weighted hotness for each
858 dimension.
859 Intuitively the hotness level of a dimension is a function of how
860 many times it was the most frequently accessed dimension in the
861 highly executed access sites of this matrix.
863 As computed by following equation:
865 __ __
866 \ \ dim_hot_level[i] +=
867 /_ /_
869 acc[j]->dim[i]->iter_by_inner_loop * count(j)
871 Where n is the number of dims and m is the number of the matrix
872 access sites. acc[j]->dim[i]->iter_by_inner_loop is 1 if acc[j]
873 iterates over dim[i] in innermost loop, and is 0 otherwise.
875 The organization of the new matrix should be according to the
876 hotness of each dimension. The hotness of the dimension implies
877 the locality of the elements.*/
878 static int
879 analyze_transpose (void **slot, void *data ATTRIBUTE_UNUSED)
881 struct matrix_info *mi = (struct matrix_info *) *slot;
882 int min_escape_l = mi->min_indirect_level_escape;
883 struct loop *loop;
884 affine_iv iv;
885 struct access_site_info *acc_info;
886 int i;
888 if (min_escape_l < 2 || !mi->access_l)
890 if (mi->access_l)
892 FOR_EACH_VEC_ELT (access_site_info_p, mi->access_l, i, acc_info)
893 free (acc_info);
894 VEC_free (access_site_info_p, heap, mi->access_l);
897 return 1;
899 if (!mi->dim_hot_level)
900 mi->dim_hot_level =
901 (gcov_type *) xcalloc (min_escape_l, sizeof (gcov_type));
904 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
905 i++)
907 if (gimple_assign_rhs_code (acc_info->stmt) == POINTER_PLUS_EXPR
908 && acc_info->level < min_escape_l)
910 loop = loop_containing_stmt (acc_info->stmt);
911 if (!loop || loop->inner)
913 free (acc_info);
914 continue;
916 if (simple_iv (loop, loop, acc_info->offset, &iv, true))
918 if (iv.step != NULL)
920 HOST_WIDE_INT istep;
922 istep = int_cst_value (iv.step);
923 if (istep != 0)
925 acc_info->iterated_by_inner_most_loop_p = 1;
926 mi->dim_hot_level[acc_info->level] +=
927 gimple_bb (acc_info->stmt)->count;
933 free (acc_info);
935 VEC_free (access_site_info_p, heap, mi->access_l);
937 return 1;
940 /* Find the index which defines the OFFSET from base.
941 We walk from use to def until we find how the offset was defined. */
942 static tree
943 get_index_from_offset (tree offset, gimple def_stmt)
945 tree op1, op2, index;
947 if (gimple_code (def_stmt) == GIMPLE_PHI)
948 return NULL;
949 if ((gimple_assign_copy_p (def_stmt) || gimple_assign_cast_p (def_stmt))
950 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
951 return get_index_from_offset (offset,
952 SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt)));
953 else if (is_gimple_assign (def_stmt)
954 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
956 op1 = gimple_assign_rhs1 (def_stmt);
957 op2 = gimple_assign_rhs2 (def_stmt);
958 if (TREE_CODE (op1) != INTEGER_CST && TREE_CODE (op2) != INTEGER_CST)
959 return NULL;
960 index = (TREE_CODE (op1) == INTEGER_CST) ? op2 : op1;
961 return index;
963 else
964 return NULL_TREE;
967 /* update MI->dimension_type_size[CURRENT_INDIRECT_LEVEL] with the size
968 of the type related to the SSA_VAR, or the type related to the
969 lhs of STMT, in the case that it is an MEM_REF. */
970 static void
971 update_type_size (struct matrix_info *mi, gimple stmt, tree ssa_var,
972 int current_indirect_level)
974 tree lhs;
975 HOST_WIDE_INT type_size;
977 /* Update type according to the type of the MEM_REF expr. */
978 if (is_gimple_assign (stmt)
979 && TREE_CODE (gimple_assign_lhs (stmt)) == MEM_REF)
981 lhs = gimple_assign_lhs (stmt);
982 gcc_assert (POINTER_TYPE_P
983 (TREE_TYPE (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
984 type_size =
985 int_size_in_bytes (TREE_TYPE
986 (TREE_TYPE
987 (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
989 else
990 type_size = int_size_in_bytes (TREE_TYPE (ssa_var));
992 /* Record the size of elements accessed (as a whole)
993 in the current indirection level (dimension). If the size of
994 elements is not known at compile time, mark it as escaping. */
995 if (type_size <= 0)
996 mark_min_matrix_escape_level (mi, current_indirect_level, stmt);
997 else
999 int l = current_indirect_level;
1001 if (!mi->dimension_type_size)
1003 mi->dimension_type_size
1004 = (HOST_WIDE_INT *) xcalloc (l + 1, sizeof (HOST_WIDE_INT));
1005 mi->dimension_type_size_len = l + 1;
1007 else if (mi->dimension_type_size_len < l + 1)
1009 mi->dimension_type_size
1010 = (HOST_WIDE_INT *) xrealloc (mi->dimension_type_size,
1011 (l + 1) * sizeof (HOST_WIDE_INT));
1012 memset (&mi->dimension_type_size[mi->dimension_type_size_len],
1013 0, (l + 1 - mi->dimension_type_size_len)
1014 * sizeof (HOST_WIDE_INT));
1015 mi->dimension_type_size_len = l + 1;
1017 /* Make sure all the accesses in the same level have the same size
1018 of the type. */
1019 if (!mi->dimension_type_size[l])
1020 mi->dimension_type_size[l] = type_size;
1021 else if (mi->dimension_type_size[l] != type_size)
1022 mark_min_matrix_escape_level (mi, l, stmt);
1026 /* USE_STMT represents a GIMPLE_CALL, where one of the arguments is the
1027 ssa var that we want to check because it came from some use of matrix
1028 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so
1029 far. */
1031 static int
1032 analyze_accesses_for_call_stmt (struct matrix_info *mi, tree ssa_var,
1033 gimple use_stmt, int current_indirect_level)
1035 tree fndecl = gimple_call_fndecl (use_stmt);
1037 if (gimple_call_lhs (use_stmt))
1039 tree lhs = gimple_call_lhs (use_stmt);
1040 struct ssa_acc_in_tree lhs_acc, rhs_acc;
1042 memset (&lhs_acc, 0, sizeof (lhs_acc));
1043 memset (&rhs_acc, 0, sizeof (rhs_acc));
1045 lhs_acc.ssa_var = ssa_var;
1046 lhs_acc.t_code = ERROR_MARK;
1047 ssa_accessed_in_tree (lhs, &lhs_acc);
1048 rhs_acc.ssa_var = ssa_var;
1049 rhs_acc.t_code = ERROR_MARK;
1050 ssa_accessed_in_call_rhs (use_stmt, &rhs_acc);
1052 /* The SSA must be either in the left side or in the right side,
1053 to understand what is happening.
1054 In case the SSA_NAME is found in both sides we should be escaping
1055 at this level because in this case we cannot calculate the
1056 address correctly. */
1057 if ((lhs_acc.var_found && rhs_acc.var_found
1058 && lhs_acc.t_code == MEM_REF)
1059 || (!rhs_acc.var_found && !lhs_acc.var_found))
1061 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1062 return current_indirect_level;
1064 gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1066 /* If we are storing to the matrix at some level, then mark it as
1067 escaping at that level. */
1068 if (lhs_acc.var_found)
1070 int l = current_indirect_level + 1;
1072 gcc_assert (lhs_acc.t_code == MEM_REF);
1073 mark_min_matrix_escape_level (mi, l, use_stmt);
1074 return current_indirect_level;
1078 if (fndecl)
1080 if (DECL_FUNCTION_CODE (fndecl) != BUILT_IN_FREE)
1082 if (dump_file)
1083 fprintf (dump_file,
1084 "Matrix %s: Function call %s, level %d escapes.\n",
1085 get_name (mi->decl), get_name (fndecl),
1086 current_indirect_level);
1087 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1089 else if (mi->free_stmts[current_indirect_level].stmt != NULL
1090 && mi->free_stmts[current_indirect_level].stmt != use_stmt)
1091 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1092 else
1094 /*Record the free statements so we can delete them
1095 later. */
1096 int l = current_indirect_level;
1098 mi->free_stmts[l].stmt = use_stmt;
1099 mi->free_stmts[l].func = current_function_decl;
1102 return current_indirect_level;
1105 /* USE_STMT represents a phi node of the ssa var that we want to
1106 check because it came from some use of matrix
1108 We check all the escaping levels that get to the PHI node
1109 and make sure they are all the same escaping;
1110 if not (which is rare) we let the escaping level be the
1111 minimum level that gets into that PHI because starting from
1112 that level we cannot expect the behavior of the indirections.
1113 CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1115 static void
1116 analyze_accesses_for_phi_node (struct matrix_info *mi, gimple use_stmt,
1117 int current_indirect_level, sbitmap visited,
1118 bool record_accesses)
1121 struct matrix_access_phi_node tmp_maphi, *maphi, **pmaphi;
1123 tmp_maphi.phi = use_stmt;
1124 if ((maphi = (struct matrix_access_phi_node *)
1125 htab_find (htab_mat_acc_phi_nodes, &tmp_maphi)))
1127 if (maphi->indirection_level == current_indirect_level)
1128 return;
1129 else
1131 int level = MIN (maphi->indirection_level,
1132 current_indirect_level);
1133 size_t j;
1134 gimple stmt = NULL;
1136 maphi->indirection_level = level;
1137 for (j = 0; j < gimple_phi_num_args (use_stmt); j++)
1139 tree def = PHI_ARG_DEF (use_stmt, j);
1141 if (gimple_code (SSA_NAME_DEF_STMT (def)) != GIMPLE_PHI)
1142 stmt = SSA_NAME_DEF_STMT (def);
1144 mark_min_matrix_escape_level (mi, level, stmt);
1146 return;
1148 maphi = (struct matrix_access_phi_node *)
1149 xcalloc (1, sizeof (struct matrix_access_phi_node));
1150 maphi->phi = use_stmt;
1151 maphi->indirection_level = current_indirect_level;
1153 /* Insert to hash table. */
1154 pmaphi = (struct matrix_access_phi_node **)
1155 htab_find_slot (htab_mat_acc_phi_nodes, maphi, INSERT);
1156 gcc_assert (pmaphi);
1157 *pmaphi = maphi;
1159 if (!TEST_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt))))
1161 SET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1162 analyze_matrix_accesses (mi, PHI_RESULT (use_stmt),
1163 current_indirect_level, false, visited,
1164 record_accesses);
1165 RESET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1169 /* USE_STMT represents an assign statement (the rhs or lhs include
1170 the ssa var that we want to check because it came from some use of matrix
1171 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1173 static int
1174 analyze_accesses_for_assign_stmt (struct matrix_info *mi, tree ssa_var,
1175 gimple use_stmt, int current_indirect_level,
1176 bool last_op, sbitmap visited,
1177 bool record_accesses)
1179 tree lhs = gimple_get_lhs (use_stmt);
1180 struct ssa_acc_in_tree lhs_acc, rhs_acc;
1182 memset (&lhs_acc, 0, sizeof (lhs_acc));
1183 memset (&rhs_acc, 0, sizeof (rhs_acc));
1185 lhs_acc.ssa_var = ssa_var;
1186 lhs_acc.t_code = ERROR_MARK;
1187 ssa_accessed_in_tree (lhs, &lhs_acc);
1188 rhs_acc.ssa_var = ssa_var;
1189 rhs_acc.t_code = ERROR_MARK;
1190 ssa_accessed_in_assign_rhs (use_stmt, &rhs_acc);
1192 /* The SSA must be either in the left side or in the right side,
1193 to understand what is happening.
1194 In case the SSA_NAME is found in both sides we should be escaping
1195 at this level because in this case we cannot calculate the
1196 address correctly. */
1197 if ((lhs_acc.var_found && rhs_acc.var_found
1198 && lhs_acc.t_code == MEM_REF)
1199 || (!rhs_acc.var_found && !lhs_acc.var_found))
1201 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1202 return current_indirect_level;
1204 gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1206 /* If we are storing to the matrix at some level, then mark it as
1207 escaping at that level. */
1208 if (lhs_acc.var_found)
1210 int l = current_indirect_level + 1;
1212 gcc_assert (lhs_acc.t_code == MEM_REF);
1214 if (!(gimple_assign_copy_p (use_stmt)
1215 || gimple_assign_cast_p (use_stmt))
1216 || (TREE_CODE (gimple_assign_rhs1 (use_stmt)) != SSA_NAME))
1217 mark_min_matrix_escape_level (mi, l, use_stmt);
1218 else
1220 gimple def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (use_stmt));
1221 analyze_matrix_allocation_site (mi, def_stmt, l, visited);
1222 if (record_accesses)
1223 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1224 NULL_TREE, l, true);
1225 update_type_size (mi, use_stmt, NULL, l);
1227 return current_indirect_level;
1229 /* Now, check the right-hand-side, to see how the SSA variable
1230 is used. */
1231 if (rhs_acc.var_found)
1233 if (rhs_acc.t_code != MEM_REF
1234 && rhs_acc.t_code != POINTER_PLUS_EXPR && rhs_acc.t_code != SSA_NAME)
1236 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1237 return current_indirect_level;
1239 /* If the access in the RHS has an indirection increase the
1240 indirection level. */
1241 if (rhs_acc.t_code == MEM_REF)
1243 if (record_accesses)
1244 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1245 NULL_TREE,
1246 current_indirect_level, true);
1247 current_indirect_level += 1;
1249 else if (rhs_acc.t_code == POINTER_PLUS_EXPR)
1251 gcc_assert (rhs_acc.second_op);
1252 if (last_op)
1253 /* Currently we support only one PLUS expression on the
1254 SSA_NAME that holds the base address of the current
1255 indirection level; to support more general case there
1256 is a need to hold a stack of expressions and regenerate
1257 the calculation later. */
1258 mark_min_matrix_escape_level (mi, current_indirect_level,
1259 use_stmt);
1260 else
1262 tree index;
1263 tree op1, op2;
1265 op1 = gimple_assign_rhs1 (use_stmt);
1266 op2 = gimple_assign_rhs2 (use_stmt);
1268 op2 = (op1 == ssa_var) ? op2 : op1;
1269 if (TREE_CODE (op2) == INTEGER_CST)
1270 index =
1271 build_int_cst (TREE_TYPE (op1),
1272 TREE_INT_CST_LOW (op2) /
1273 int_size_in_bytes (TREE_TYPE (op1)));
1274 else
1276 index =
1277 get_index_from_offset (op2, SSA_NAME_DEF_STMT (op2));
1278 if (index == NULL_TREE)
1280 mark_min_matrix_escape_level (mi,
1281 current_indirect_level,
1282 use_stmt);
1283 return current_indirect_level;
1286 if (record_accesses)
1287 record_access_alloc_site_info (mi, use_stmt, op2,
1288 index,
1289 current_indirect_level, false);
1292 /* If we are storing this level of indirection mark it as
1293 escaping. */
1294 if (lhs_acc.t_code == MEM_REF || TREE_CODE (lhs) != SSA_NAME)
1296 int l = current_indirect_level;
1298 /* One exception is when we are storing to the matrix
1299 variable itself; this is the case of malloc, we must make
1300 sure that it's the one and only one call to malloc so
1301 we call analyze_matrix_allocation_site to check
1302 this out. */
1303 if (TREE_CODE (lhs) != VAR_DECL || lhs != mi->decl)
1304 mark_min_matrix_escape_level (mi, current_indirect_level,
1305 use_stmt);
1306 else
1308 /* Also update the escaping level. */
1309 analyze_matrix_allocation_site (mi, use_stmt, l, visited);
1310 if (record_accesses)
1311 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1312 NULL_TREE, l, true);
1315 else
1317 /* We are placing it in an SSA, follow that SSA. */
1318 analyze_matrix_accesses (mi, lhs,
1319 current_indirect_level,
1320 rhs_acc.t_code == POINTER_PLUS_EXPR,
1321 visited, record_accesses);
1324 return current_indirect_level;
1327 /* Given a SSA_VAR (coming from a use statement of the matrix MI),
1328 follow its uses and level of indirection and find out the minimum
1329 indirection level it escapes in (the highest dimension) and the maximum
1330 level it is accessed in (this will be the actual dimension of the
1331 matrix). The information is accumulated in MI.
1332 We look at the immediate uses, if one escapes we finish; if not,
1333 we make a recursive call for each one of the immediate uses of the
1334 resulting SSA name. */
1335 static void
1336 analyze_matrix_accesses (struct matrix_info *mi, tree ssa_var,
1337 int current_indirect_level, bool last_op,
1338 sbitmap visited, bool record_accesses)
1340 imm_use_iterator imm_iter;
1341 use_operand_p use_p;
1343 update_type_size (mi, SSA_NAME_DEF_STMT (ssa_var), ssa_var,
1344 current_indirect_level);
1346 /* We don't go beyond the escaping level when we are performing the
1347 flattening. NOTE: we keep the last indirection level that doesn't
1348 escape. */
1349 if (mi->min_indirect_level_escape > -1
1350 && mi->min_indirect_level_escape <= current_indirect_level)
1351 return;
1353 /* Now go over the uses of the SSA_NAME and check how it is used in
1354 each one of them. We are mainly looking for the pattern MEM_REF,
1355 then a POINTER_PLUS_EXPR, then MEM_REF etc. while in between there could
1356 be any number of copies and casts. */
1357 gcc_assert (TREE_CODE (ssa_var) == SSA_NAME);
1359 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_var)
1361 gimple use_stmt = USE_STMT (use_p);
1362 if (gimple_code (use_stmt) == GIMPLE_PHI)
1363 /* We check all the escaping levels that get to the PHI node
1364 and make sure they are all the same escaping;
1365 if not (which is rare) we let the escaping level be the
1366 minimum level that gets into that PHI because starting from
1367 that level we cannot expect the behavior of the indirections. */
1369 analyze_accesses_for_phi_node (mi, use_stmt, current_indirect_level,
1370 visited, record_accesses);
1372 else if (is_gimple_call (use_stmt))
1373 analyze_accesses_for_call_stmt (mi, ssa_var, use_stmt,
1374 current_indirect_level);
1375 else if (is_gimple_assign (use_stmt))
1376 current_indirect_level =
1377 analyze_accesses_for_assign_stmt (mi, ssa_var, use_stmt,
1378 current_indirect_level, last_op,
1379 visited, record_accesses);
1383 typedef struct
1385 tree fn;
1386 gimple stmt;
1387 } check_var_data;
1389 /* A walk_tree function to go over the VAR_DECL, PARM_DECL nodes of
1390 the malloc size expression and check that those aren't changed
1391 over the function. */
1392 static tree
1393 check_var_notmodified_p (tree * tp, int *walk_subtrees, void *data)
1395 basic_block bb;
1396 tree t = *tp;
1397 check_var_data *callback_data = (check_var_data*) data;
1398 tree fn = callback_data->fn;
1399 gimple_stmt_iterator gsi;
1400 gimple stmt;
1402 if (TREE_CODE (t) != VAR_DECL && TREE_CODE (t) != PARM_DECL)
1403 return NULL_TREE;
1405 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (fn))
1407 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1409 stmt = gsi_stmt (gsi);
1410 if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
1411 continue;
1412 if (gimple_get_lhs (stmt) == t)
1414 callback_data->stmt = stmt;
1415 return t;
1419 *walk_subtrees = 1;
1420 return NULL_TREE;
1423 /* Go backwards in the use-def chains and find out the expression
1424 represented by the possible SSA name in STMT, until it is composed
1425 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1426 we make sure that all the arguments represent the same subexpression,
1427 otherwise we fail. */
1429 static tree
1430 can_calculate_stmt_before_stmt (gimple stmt, sbitmap visited)
1432 tree op1, op2, res;
1433 enum tree_code code;
1435 switch (gimple_code (stmt))
1437 case GIMPLE_ASSIGN:
1438 code = gimple_assign_rhs_code (stmt);
1439 op1 = gimple_assign_rhs1 (stmt);
1441 switch (code)
1443 case POINTER_PLUS_EXPR:
1444 case PLUS_EXPR:
1445 case MINUS_EXPR:
1446 case MULT_EXPR:
1448 op2 = gimple_assign_rhs2 (stmt);
1449 op1 = can_calculate_expr_before_stmt (op1, visited);
1450 if (!op1)
1451 return NULL_TREE;
1452 op2 = can_calculate_expr_before_stmt (op2, visited);
1453 if (op2)
1454 return fold_build2 (code, gimple_expr_type (stmt), op1, op2);
1455 return NULL_TREE;
1457 CASE_CONVERT:
1458 res = can_calculate_expr_before_stmt (op1, visited);
1459 if (res != NULL_TREE)
1460 return build1 (code, gimple_expr_type (stmt), res);
1461 else
1462 return NULL_TREE;
1464 default:
1465 if (gimple_assign_single_p (stmt))
1466 return can_calculate_expr_before_stmt (op1, visited);
1467 else
1468 return NULL_TREE;
1471 case GIMPLE_PHI:
1473 size_t j;
1475 res = NULL_TREE;
1476 /* Make sure all the arguments represent the same value. */
1477 for (j = 0; j < gimple_phi_num_args (stmt); j++)
1479 tree new_res;
1480 tree def = PHI_ARG_DEF (stmt, j);
1482 new_res = can_calculate_expr_before_stmt (def, visited);
1483 if (res == NULL_TREE)
1484 res = new_res;
1485 else if (!new_res || !expressions_equal_p (res, new_res))
1486 return NULL_TREE;
1488 return res;
1491 default:
1492 return NULL_TREE;
1496 /* Go backwards in the use-def chains and find out the expression
1497 represented by the possible SSA name in EXPR, until it is composed
1498 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1499 we make sure that all the arguments represent the same subexpression,
1500 otherwise we fail. */
1501 static tree
1502 can_calculate_expr_before_stmt (tree expr, sbitmap visited)
1504 gimple def_stmt;
1505 tree res;
1507 switch (TREE_CODE (expr))
1509 case SSA_NAME:
1510 /* Case of loop, we don't know to represent this expression. */
1511 if (TEST_BIT (visited, SSA_NAME_VERSION (expr)))
1512 return NULL_TREE;
1514 SET_BIT (visited, SSA_NAME_VERSION (expr));
1515 def_stmt = SSA_NAME_DEF_STMT (expr);
1516 res = can_calculate_stmt_before_stmt (def_stmt, visited);
1517 RESET_BIT (visited, SSA_NAME_VERSION (expr));
1518 return res;
1519 case VAR_DECL:
1520 case PARM_DECL:
1521 case INTEGER_CST:
1522 return expr;
1524 default:
1525 return NULL_TREE;
1529 /* There should be only one allocation function for the dimensions
1530 that don't escape. Here we check the allocation sites in this
1531 function. We must make sure that all the dimensions are allocated
1532 using malloc and that the malloc size parameter expression could be
1533 pre-calculated before the call to the malloc of dimension 0.
1535 Given a candidate matrix for flattening -- MI -- check if it's
1536 appropriate for flattening -- we analyze the allocation
1537 sites that we recorded in the previous analysis. The result of the
1538 analysis is a level of indirection (matrix dimension) in which the
1539 flattening is safe. We check the following conditions:
1540 1. There is only one allocation site for each dimension.
1541 2. The allocation sites of all the dimensions are in the same
1542 function.
1543 (The above two are being taken care of during the analysis when
1544 we check the allocation site).
1545 3. All the dimensions that we flatten are allocated at once; thus
1546 the total size must be known before the allocation of the
1547 dimension 0 (top level) -- we must make sure we represent the
1548 size of the allocation as an expression of global parameters or
1549 constants and that those doesn't change over the function. */
1551 static int
1552 check_allocation_function (void **slot, void *data ATTRIBUTE_UNUSED)
1554 int level;
1555 struct matrix_info *mi = (struct matrix_info *) *slot;
1556 sbitmap visited;
1558 if (!mi->malloc_for_level)
1559 return 1;
1561 visited = sbitmap_alloc (num_ssa_names);
1563 /* Do nothing if the current function is not the allocation
1564 function of MI. */
1565 if (mi->allocation_function_decl != current_function_decl
1566 /* We aren't in the main allocation function yet. */
1567 || !mi->malloc_for_level[0])
1568 return 1;
1570 for (level = 1; level < mi->max_malloced_level; level++)
1571 if (!mi->malloc_for_level[level])
1572 break;
1574 mark_min_matrix_escape_level (mi, level, NULL);
1576 /* Check if the expression of the size passed to malloc could be
1577 pre-calculated before the malloc of level 0. */
1578 for (level = 1; level < mi->min_indirect_level_escape; level++)
1580 gimple call_stmt;
1581 tree size;
1582 struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE};
1584 call_stmt = mi->malloc_for_level[level];
1586 /* Find the correct malloc information. */
1587 collect_data_for_malloc_call (call_stmt, &mcd);
1589 /* No need to check anticipation for constants. */
1590 if (TREE_CODE (mcd.size_var) == INTEGER_CST)
1592 if (!mi->dimension_size)
1594 mi->dimension_size =
1595 (tree *) xcalloc (mi->min_indirect_level_escape,
1596 sizeof (tree));
1597 mi->dimension_size_orig =
1598 (tree *) xcalloc (mi->min_indirect_level_escape,
1599 sizeof (tree));
1601 mi->dimension_size[level] = mcd.size_var;
1602 mi->dimension_size_orig[level] = mcd.size_var;
1603 continue;
1605 /* ??? Here we should also add the way to calculate the size
1606 expression not only know that it is anticipated. */
1607 sbitmap_zero (visited);
1608 size = can_calculate_expr_before_stmt (mcd.size_var, visited);
1609 if (size == NULL_TREE)
1611 mark_min_matrix_escape_level (mi, level, call_stmt);
1612 if (dump_file)
1613 fprintf (dump_file,
1614 "Matrix %s: Cannot calculate the size of allocation, escaping at level %d\n",
1615 get_name (mi->decl), level);
1616 break;
1618 if (!mi->dimension_size)
1620 mi->dimension_size =
1621 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1622 mi->dimension_size_orig =
1623 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1625 mi->dimension_size[level] = size;
1626 mi->dimension_size_orig[level] = size;
1629 /* We don't need those anymore. */
1630 for (level = mi->min_indirect_level_escape;
1631 level < mi->max_malloced_level; level++)
1632 mi->malloc_for_level[level] = NULL;
1633 return 1;
1636 /* Track all access and allocation sites. */
1637 static void
1638 find_sites_in_func (bool record)
1640 sbitmap visited_stmts_1;
1642 gimple_stmt_iterator gsi;
1643 gimple stmt;
1644 basic_block bb;
1645 struct matrix_info tmpmi, *mi;
1647 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1649 FOR_EACH_BB (bb)
1651 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1653 tree lhs;
1655 stmt = gsi_stmt (gsi);
1656 lhs = gimple_get_lhs (stmt);
1657 if (lhs != NULL_TREE
1658 && TREE_CODE (lhs) == VAR_DECL)
1660 tmpmi.decl = lhs;
1661 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1662 &tmpmi)))
1664 sbitmap_zero (visited_stmts_1);
1665 analyze_matrix_allocation_site (mi, stmt, 0, visited_stmts_1);
1668 if (is_gimple_assign (stmt)
1669 && gimple_assign_single_p (stmt)
1670 && TREE_CODE (lhs) == SSA_NAME
1671 && TREE_CODE (gimple_assign_rhs1 (stmt)) == VAR_DECL)
1673 tmpmi.decl = gimple_assign_rhs1 (stmt);
1674 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1675 &tmpmi)))
1677 sbitmap_zero (visited_stmts_1);
1678 analyze_matrix_accesses (mi, lhs, 0,
1679 false, visited_stmts_1, record);
1684 sbitmap_free (visited_stmts_1);
1687 /* Traverse the use-def chains to see if there are matrices that
1688 are passed through pointers and we cannot know how they are accessed.
1689 For each SSA-name defined by a global variable of our interest,
1690 we traverse the use-def chains of the SSA and follow the indirections,
1691 and record in what level of indirection the use of the variable
1692 escapes. A use of a pointer escapes when it is passed to a function,
1693 stored into memory or assigned (except in malloc and free calls). */
1695 static void
1696 record_all_accesses_in_func (void)
1698 unsigned i;
1699 sbitmap visited_stmts_1;
1701 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1703 for (i = 0; i < num_ssa_names; i++)
1705 struct matrix_info tmpmi, *mi;
1706 tree ssa_var = ssa_name (i);
1707 tree rhs, lhs;
1709 if (!ssa_var
1710 || !is_gimple_assign (SSA_NAME_DEF_STMT (ssa_var))
1711 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (ssa_var)))
1712 continue;
1713 rhs = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (ssa_var));
1714 lhs = gimple_assign_lhs (SSA_NAME_DEF_STMT (ssa_var));
1715 if (TREE_CODE (rhs) != VAR_DECL && TREE_CODE (lhs) != VAR_DECL)
1716 continue;
1718 /* If the RHS is a matrix that we want to analyze, follow the def-use
1719 chain for this SSA_VAR and check for escapes or apply the
1720 flattening. */
1721 tmpmi.decl = rhs;
1722 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi)))
1724 /* This variable will track the visited PHI nodes, so we can limit
1725 its size to the maximum number of SSA names. */
1726 sbitmap_zero (visited_stmts_1);
1727 analyze_matrix_accesses (mi, ssa_var,
1728 0, false, visited_stmts_1, true);
1732 sbitmap_free (visited_stmts_1);
1735 /* Used when we want to convert the expression: RESULT = something *
1736 ORIG to RESULT = something * NEW_VAL. If ORIG and NEW_VAL are power
1737 of 2, shift operations can be done, else division and
1738 multiplication. */
1740 static tree
1741 compute_offset (HOST_WIDE_INT orig, HOST_WIDE_INT new_val, tree result)
1744 int x, y;
1745 tree result1, ratio, log, orig_tree, new_tree;
1747 x = exact_log2 (orig);
1748 y = exact_log2 (new_val);
1750 if (x != -1 && y != -1)
1752 if (x == y)
1753 return result;
1754 else if (x > y)
1756 log = build_int_cst (TREE_TYPE (result), x - y);
1757 result1 =
1758 fold_build2 (LSHIFT_EXPR, TREE_TYPE (result), result, log);
1759 return result1;
1761 log = build_int_cst (TREE_TYPE (result), y - x);
1762 result1 = fold_build2 (RSHIFT_EXPR, TREE_TYPE (result), result, log);
1764 return result1;
1766 orig_tree = build_int_cst (TREE_TYPE (result), orig);
1767 new_tree = build_int_cst (TREE_TYPE (result), new_val);
1768 ratio = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (result), result, orig_tree);
1769 result1 = fold_build2 (MULT_EXPR, TREE_TYPE (result), ratio, new_tree);
1771 return result1;
1775 /* We know that we are allowed to perform matrix flattening (according to the
1776 escape analysis), so we traverse the use-def chains of the SSA vars
1777 defined by the global variables pointing to the matrices of our interest.
1778 in each use of the SSA we calculate the offset from the base address
1779 according to the following equation:
1781 a[I1][I2]...[Ik] , where D1..Dk is the length of each dimension and the
1782 escaping level is m <= k, and a' is the new allocated matrix,
1783 will be translated to :
1785 b[I(m+1)]...[Ik]
1787 where
1788 b = a' + I1*D2...*Dm + I2*D3...Dm + ... + Im
1791 static int
1792 transform_access_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1794 gimple_stmt_iterator gsi;
1795 struct matrix_info *mi = (struct matrix_info *) *slot;
1796 int min_escape_l = mi->min_indirect_level_escape;
1797 struct access_site_info *acc_info;
1798 enum tree_code code;
1799 int i;
1801 if (min_escape_l < 2 || !mi->access_l)
1802 return 1;
1803 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
1804 i++)
1806 /* This is possible because we collect the access sites before
1807 we determine the final minimum indirection level. */
1808 if (acc_info->level >= min_escape_l)
1810 free (acc_info);
1811 continue;
1813 if (acc_info->is_alloc)
1815 if (acc_info->level >= 0 && gimple_bb (acc_info->stmt))
1817 ssa_op_iter iter;
1818 tree def;
1819 gimple stmt = acc_info->stmt;
1820 tree lhs;
1822 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
1823 mark_sym_for_renaming (SSA_NAME_VAR (def));
1824 gsi = gsi_for_stmt (stmt);
1825 gcc_assert (is_gimple_assign (acc_info->stmt));
1826 lhs = gimple_assign_lhs (acc_info->stmt);
1827 if (TREE_CODE (lhs) == SSA_NAME
1828 && acc_info->level < min_escape_l - 1)
1830 imm_use_iterator imm_iter;
1831 use_operand_p use_p;
1832 gimple use_stmt;
1834 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
1835 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1837 tree rhs, tmp;
1838 gimple new_stmt;
1840 gcc_assert (gimple_assign_rhs_code (acc_info->stmt)
1841 == MEM_REF);
1842 /* Emit convert statement to convert to type of use. */
1843 tmp = create_tmp_var (TREE_TYPE (lhs), "new");
1844 add_referenced_var (tmp);
1845 rhs = gimple_assign_rhs1 (acc_info->stmt);
1846 rhs = fold_convert (TREE_TYPE (tmp),
1847 TREE_OPERAND (rhs, 0));
1848 new_stmt = gimple_build_assign (tmp, rhs);
1849 tmp = make_ssa_name (tmp, new_stmt);
1850 gimple_assign_set_lhs (new_stmt, tmp);
1851 gsi = gsi_for_stmt (acc_info->stmt);
1852 gsi_insert_after (&gsi, new_stmt, GSI_SAME_STMT);
1853 SET_USE (use_p, tmp);
1856 if (acc_info->level < min_escape_l - 1)
1857 gsi_remove (&gsi, true);
1859 free (acc_info);
1860 continue;
1862 code = gimple_assign_rhs_code (acc_info->stmt);
1863 if (code == MEM_REF
1864 && acc_info->level < min_escape_l - 1)
1866 /* Replace the MEM_REF with NOP (cast) usually we are casting
1867 from "pointer to type" to "type". */
1868 tree t =
1869 build1 (NOP_EXPR, TREE_TYPE (gimple_assign_rhs1 (acc_info->stmt)),
1870 TREE_OPERAND (gimple_assign_rhs1 (acc_info->stmt), 0));
1871 gimple_assign_set_rhs_code (acc_info->stmt, NOP_EXPR);
1872 gimple_assign_set_rhs1 (acc_info->stmt, t);
1874 else if (code == POINTER_PLUS_EXPR
1875 && acc_info->level < (min_escape_l))
1877 imm_use_iterator imm_iter;
1878 use_operand_p use_p;
1880 tree offset;
1881 int k = acc_info->level;
1882 tree num_elements, total_elements;
1883 tree tmp1;
1884 tree d_size = mi->dimension_size[k];
1886 /* We already make sure in the analysis that the first operand
1887 is the base and the second is the offset. */
1888 offset = acc_info->offset;
1889 if (mi->dim_map[k] == min_escape_l - 1)
1891 if (!check_transpose_p || mi->is_transposed_p == false)
1892 tmp1 = offset;
1893 else
1895 tree new_offset;
1897 new_offset =
1898 compute_offset (mi->dimension_type_size[min_escape_l],
1899 mi->dimension_type_size[k + 1], offset);
1901 total_elements = new_offset;
1902 if (new_offset != offset)
1904 gsi = gsi_for_stmt (acc_info->stmt);
1905 tmp1 = force_gimple_operand_gsi (&gsi, total_elements,
1906 true, NULL,
1907 true, GSI_SAME_STMT);
1909 else
1910 tmp1 = offset;
1913 else
1915 d_size = mi->dimension_size[mi->dim_map[k] + 1];
1916 num_elements =
1917 fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, acc_info->index),
1918 fold_convert (sizetype, d_size));
1919 add_referenced_var (d_size);
1920 gsi = gsi_for_stmt (acc_info->stmt);
1921 tmp1 = force_gimple_operand_gsi (&gsi, num_elements, true,
1922 NULL, true, GSI_SAME_STMT);
1924 /* Replace the offset if needed. */
1925 if (tmp1 != offset)
1927 if (TREE_CODE (offset) == SSA_NAME)
1929 gimple use_stmt;
1931 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, offset)
1932 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1933 if (use_stmt == acc_info->stmt)
1934 SET_USE (use_p, tmp1);
1936 else
1938 gcc_assert (TREE_CODE (offset) == INTEGER_CST);
1939 gimple_assign_set_rhs2 (acc_info->stmt, tmp1);
1940 update_stmt (acc_info->stmt);
1944 /* ??? meanwhile this happens because we record the same access
1945 site more than once; we should be using a hash table to
1946 avoid this and insert the STMT of the access site only
1947 once.
1948 else
1949 gcc_unreachable (); */
1950 free (acc_info);
1952 VEC_free (access_site_info_p, heap, mi->access_l);
1954 update_ssa (TODO_update_ssa);
1955 #ifdef ENABLE_CHECKING
1956 verify_ssa (true);
1957 #endif
1958 return 1;
1961 /* Sort A array of counts. Arrange DIM_MAP to reflect the new order. */
1963 static void
1964 sort_dim_hot_level (gcov_type * a, int *dim_map, int n)
1966 int i, j, tmp1;
1967 gcov_type tmp;
1969 for (i = 0; i < n - 1; i++)
1971 for (j = 0; j < n - 1 - i; j++)
1973 if (a[j + 1] < a[j])
1975 tmp = a[j]; /* swap a[j] and a[j+1] */
1976 a[j] = a[j + 1];
1977 a[j + 1] = tmp;
1978 tmp1 = dim_map[j];
1979 dim_map[j] = dim_map[j + 1];
1980 dim_map[j + 1] = tmp1;
1986 /* Replace multiple mallocs (one for each dimension) to one malloc
1987 with the size of DIM1*DIM2*...*DIMN*size_of_element
1988 Make sure that we hold the size in the malloc site inside a
1989 new global variable; this way we ensure that the size doesn't
1990 change and it is accessible from all the other functions that
1991 uses the matrix. Also, the original calls to free are deleted,
1992 and replaced by a new call to free the flattened matrix. */
1994 static int
1995 transform_allocation_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1997 int i;
1998 struct matrix_info *mi;
1999 tree type, oldfn, prev_dim_size;
2000 gimple call_stmt_0, use_stmt;
2001 struct cgraph_node *c_node;
2002 struct cgraph_edge *e;
2003 gimple_stmt_iterator gsi;
2004 struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE};
2005 HOST_WIDE_INT element_size;
2007 imm_use_iterator imm_iter;
2008 use_operand_p use_p;
2009 tree old_size_0, tmp;
2010 int min_escape_l;
2011 int id;
2013 mi = (struct matrix_info *) *slot;
2015 min_escape_l = mi->min_indirect_level_escape;
2017 if (!mi->malloc_for_level)
2018 mi->min_indirect_level_escape = 0;
2020 if (mi->min_indirect_level_escape < 2)
2021 return 1;
2023 mi->dim_map = (int *) xcalloc (mi->min_indirect_level_escape, sizeof (int));
2024 for (i = 0; i < mi->min_indirect_level_escape; i++)
2025 mi->dim_map[i] = i;
2026 if (check_transpose_p)
2028 int i;
2030 if (dump_file)
2032 fprintf (dump_file, "Matrix %s:\n", get_name (mi->decl));
2033 for (i = 0; i < min_escape_l; i++)
2035 fprintf (dump_file, "dim %d before sort ", i);
2036 if (mi->dim_hot_level)
2037 fprintf (dump_file,
2038 "count is " HOST_WIDEST_INT_PRINT_DEC " \n",
2039 mi->dim_hot_level[i]);
2042 sort_dim_hot_level (mi->dim_hot_level, mi->dim_map,
2043 mi->min_indirect_level_escape);
2044 if (dump_file)
2045 for (i = 0; i < min_escape_l; i++)
2047 fprintf (dump_file, "dim %d after sort\n", i);
2048 if (mi->dim_hot_level)
2049 fprintf (dump_file, "count is " HOST_WIDE_INT_PRINT_DEC
2050 " \n", (HOST_WIDE_INT) mi->dim_hot_level[i]);
2052 for (i = 0; i < mi->min_indirect_level_escape; i++)
2054 if (dump_file)
2055 fprintf (dump_file, "dim_map[%d] after sort %d\n", i,
2056 mi->dim_map[i]);
2057 if (mi->dim_map[i] != i)
2059 if (dump_file)
2060 fprintf (dump_file,
2061 "Transposed dimensions: dim %d is now dim %d\n",
2062 mi->dim_map[i], i);
2063 mi->is_transposed_p = true;
2067 else
2069 for (i = 0; i < mi->min_indirect_level_escape; i++)
2070 mi->dim_map[i] = i;
2072 /* Call statement of allocation site of level 0. */
2073 call_stmt_0 = mi->malloc_for_level[0];
2075 /* Finds the correct malloc information. */
2076 collect_data_for_malloc_call (call_stmt_0, &mcd);
2078 mi->dimension_size[0] = mcd.size_var;
2079 mi->dimension_size_orig[0] = mcd.size_var;
2080 /* Make sure that the variables in the size expression for
2081 all the dimensions (above level 0) aren't modified in
2082 the allocation function. */
2083 for (i = 1; i < mi->min_indirect_level_escape; i++)
2085 tree t;
2086 check_var_data data;
2088 /* mi->dimension_size must contain the expression of the size calculated
2089 in check_allocation_function. */
2090 gcc_assert (mi->dimension_size[i]);
2092 data.fn = mi->allocation_function_decl;
2093 data.stmt = NULL;
2094 t = walk_tree_without_duplicates (&(mi->dimension_size[i]),
2095 check_var_notmodified_p,
2096 &data);
2097 if (t != NULL_TREE)
2099 mark_min_matrix_escape_level (mi, i, data.stmt);
2100 break;
2104 if (mi->min_indirect_level_escape < 2)
2105 return 1;
2107 /* Since we should make sure that the size expression is available
2108 before the call to malloc of level 0. */
2109 gsi = gsi_for_stmt (call_stmt_0);
2111 /* Find out the size of each dimension by looking at the malloc
2112 sites and create a global variable to hold it.
2113 We add the assignment to the global before the malloc of level 0. */
2115 /* To be able to produce gimple temporaries. */
2116 oldfn = current_function_decl;
2117 current_function_decl = mi->allocation_function_decl;
2118 push_cfun (DECL_STRUCT_FUNCTION (mi->allocation_function_decl));
2120 /* Set the dimension sizes as follows:
2121 DIM_SIZE[i] = DIM_SIZE[n] * ... * DIM_SIZE[i]
2122 where n is the maximum non escaping level. */
2123 element_size = mi->dimension_type_size[mi->min_indirect_level_escape];
2124 prev_dim_size = NULL_TREE;
2126 for (i = mi->min_indirect_level_escape - 1; i >= 0; i--)
2128 tree dim_size, dim_var;
2129 gimple stmt;
2130 tree d_type_size;
2132 /* Now put the size expression in a global variable and initialize it to
2133 the size expression before the malloc of level 0. */
2134 dim_var =
2135 add_new_static_var (TREE_TYPE
2136 (mi->dimension_size_orig[mi->dim_map[i]]));
2137 type = TREE_TYPE (mi->dimension_size_orig[mi->dim_map[i]]);
2139 /* DIM_SIZE = MALLOC_SIZE_PARAM / TYPE_SIZE. */
2140 /* Find which dim ID becomes dim I. */
2141 for (id = 0; id < mi->min_indirect_level_escape; id++)
2142 if (mi->dim_map[id] == i)
2143 break;
2144 d_type_size =
2145 build_int_cst (type, mi->dimension_type_size[id + 1]);
2146 if (!prev_dim_size)
2147 prev_dim_size = build_int_cst (type, element_size);
2148 if (!check_transpose_p && i == mi->min_indirect_level_escape - 1)
2150 dim_size = mi->dimension_size_orig[id];
2152 else
2154 dim_size =
2155 fold_build2 (TRUNC_DIV_EXPR, type, mi->dimension_size_orig[id],
2156 d_type_size);
2158 dim_size = fold_build2 (MULT_EXPR, type, dim_size, prev_dim_size);
2160 dim_size = force_gimple_operand_gsi (&gsi, dim_size, true, NULL,
2161 true, GSI_SAME_STMT);
2162 /* GLOBAL_HOLDING_THE_SIZE = DIM_SIZE. */
2163 stmt = gimple_build_assign (dim_var, dim_size);
2164 mark_symbols_for_renaming (stmt);
2165 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2167 prev_dim_size = mi->dimension_size[i] = dim_var;
2169 update_ssa (TODO_update_ssa);
2170 /* Replace the malloc size argument in the malloc of level 0 to be
2171 the size of all the dimensions. */
2172 c_node = cgraph_node (mi->allocation_function_decl);
2173 old_size_0 = gimple_call_arg (call_stmt_0, 0);
2174 tmp = force_gimple_operand_gsi (&gsi, mi->dimension_size[0], true,
2175 NULL, true, GSI_SAME_STMT);
2176 if (TREE_CODE (old_size_0) == SSA_NAME)
2178 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, old_size_0)
2179 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2180 if (use_stmt == call_stmt_0)
2181 SET_USE (use_p, tmp);
2183 /* When deleting the calls to malloc we need also to remove the edge from
2184 the call graph to keep it consistent. Notice that cgraph_edge may
2185 create a new node in the call graph if there is no node for the given
2186 declaration; this shouldn't be the case but currently there is no way to
2187 check this outside of "cgraph.c". */
2188 for (i = 1; i < mi->min_indirect_level_escape; i++)
2190 gimple_stmt_iterator gsi;
2192 gimple call_stmt = mi->malloc_for_level[i];
2193 gcc_assert (is_gimple_call (call_stmt));
2194 e = cgraph_edge (c_node, call_stmt);
2195 gcc_assert (e);
2196 cgraph_remove_edge (e);
2197 gsi = gsi_for_stmt (call_stmt);
2198 /* Remove the call stmt. */
2199 gsi_remove (&gsi, true);
2200 /* Remove the assignment of the allocated area. */
2201 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2202 gimple_call_lhs (call_stmt))
2204 gsi = gsi_for_stmt (use_stmt);
2205 gsi_remove (&gsi, true);
2208 update_ssa (TODO_update_ssa);
2209 #ifdef ENABLE_CHECKING
2210 verify_ssa (true);
2211 #endif
2212 /* Delete the calls to free. */
2213 for (i = 1; i < mi->min_indirect_level_escape; i++)
2215 gimple_stmt_iterator gsi;
2217 /* ??? wonder why this case is possible but we failed on it once. */
2218 if (!mi->free_stmts[i].stmt)
2219 continue;
2221 c_node = cgraph_node (mi->free_stmts[i].func);
2222 gcc_assert (is_gimple_call (mi->free_stmts[i].stmt));
2223 e = cgraph_edge (c_node, mi->free_stmts[i].stmt);
2224 gcc_assert (e);
2225 cgraph_remove_edge (e);
2226 current_function_decl = mi->free_stmts[i].func;
2227 set_cfun (DECL_STRUCT_FUNCTION (mi->free_stmts[i].func));
2228 gsi = gsi_for_stmt (mi->free_stmts[i].stmt);
2229 gsi_remove (&gsi, true);
2231 /* Return to the previous situation. */
2232 current_function_decl = oldfn;
2233 pop_cfun ();
2234 return 1;
2239 /* Print out the results of the escape analysis. */
2240 static int
2241 dump_matrix_reorg_analysis (void **slot, void *data ATTRIBUTE_UNUSED)
2243 struct matrix_info *mi = (struct matrix_info *) *slot;
2245 if (!dump_file)
2246 return 1;
2247 fprintf (dump_file, "Matrix \"%s\"; Escaping Level: %d, Num Dims: %d,",
2248 get_name (mi->decl), mi->min_indirect_level_escape, mi->num_dims);
2249 fprintf (dump_file, " Malloc Dims: %d, ", mi->max_malloced_level);
2250 fprintf (dump_file, "\n");
2251 if (mi->min_indirect_level_escape >= 2)
2252 fprintf (dump_file, "Flattened %d dimensions \n",
2253 mi->min_indirect_level_escape);
2254 return 1;
2257 /* Perform matrix flattening. */
2259 static unsigned int
2260 matrix_reorg (void)
2262 struct cgraph_node *node;
2264 if (profile_info)
2265 check_transpose_p = true;
2266 else
2267 check_transpose_p = false;
2268 /* If there are hand written vectors, we skip this optimization. */
2269 for (node = cgraph_nodes; node; node = node->next)
2270 if (!may_flatten_matrices (node))
2271 return 0;
2272 matrices_to_reorg = htab_create (37, mtt_info_hash, mtt_info_eq, mat_free);
2273 /* Find and record all potential matrices in the program. */
2274 find_matrices_decl ();
2275 /* Analyze the accesses of the matrices (escaping analysis). */
2276 for (node = cgraph_nodes; node; node = node->next)
2277 if (node->analyzed)
2279 tree temp_fn;
2281 temp_fn = current_function_decl;
2282 current_function_decl = node->decl;
2283 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2284 bitmap_obstack_initialize (NULL);
2285 gimple_register_cfg_hooks ();
2287 if (!gimple_in_ssa_p (cfun))
2289 free_dominance_info (CDI_DOMINATORS);
2290 free_dominance_info (CDI_POST_DOMINATORS);
2291 pop_cfun ();
2292 current_function_decl = temp_fn;
2293 bitmap_obstack_release (NULL);
2295 return 0;
2298 #ifdef ENABLE_CHECKING
2299 verify_flow_info ();
2300 #endif
2302 if (!matrices_to_reorg)
2304 free_dominance_info (CDI_DOMINATORS);
2305 free_dominance_info (CDI_POST_DOMINATORS);
2306 pop_cfun ();
2307 current_function_decl = temp_fn;
2308 bitmap_obstack_release (NULL);
2310 return 0;
2313 /* Create htap for phi nodes. */
2314 htab_mat_acc_phi_nodes = htab_create (37, mat_acc_phi_hash,
2315 mat_acc_phi_eq, free);
2316 if (!check_transpose_p)
2317 find_sites_in_func (false);
2318 else
2320 find_sites_in_func (true);
2321 loop_optimizer_init (LOOPS_NORMAL);
2322 if (current_loops)
2323 scev_initialize ();
2324 htab_traverse (matrices_to_reorg, analyze_transpose, NULL);
2325 if (current_loops)
2327 scev_finalize ();
2328 loop_optimizer_finalize ();
2329 current_loops = NULL;
2332 /* If the current function is the allocation function for any of
2333 the matrices we check its allocation and the escaping level. */
2334 htab_traverse (matrices_to_reorg, check_allocation_function, NULL);
2335 free_dominance_info (CDI_DOMINATORS);
2336 free_dominance_info (CDI_POST_DOMINATORS);
2337 pop_cfun ();
2338 current_function_decl = temp_fn;
2339 bitmap_obstack_release (NULL);
2341 htab_traverse (matrices_to_reorg, transform_allocation_sites, NULL);
2342 /* Now transform the accesses. */
2343 for (node = cgraph_nodes; node; node = node->next)
2344 if (node->analyzed)
2346 /* Remember that allocation sites have been handled. */
2347 tree temp_fn;
2349 temp_fn = current_function_decl;
2350 current_function_decl = node->decl;
2351 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2352 bitmap_obstack_initialize (NULL);
2353 gimple_register_cfg_hooks ();
2354 record_all_accesses_in_func ();
2355 htab_traverse (matrices_to_reorg, transform_access_sites, NULL);
2356 cgraph_rebuild_references ();
2357 free_dominance_info (CDI_DOMINATORS);
2358 free_dominance_info (CDI_POST_DOMINATORS);
2359 pop_cfun ();
2360 current_function_decl = temp_fn;
2361 bitmap_obstack_release (NULL);
2363 htab_traverse (matrices_to_reorg, dump_matrix_reorg_analysis, NULL);
2365 current_function_decl = NULL;
2366 set_cfun (NULL);
2367 matrices_to_reorg = NULL;
2368 return 0;
2372 /* The condition for matrix flattening to be performed. */
2373 static bool
2374 gate_matrix_reorg (void)
2376 return flag_ipa_matrix_reorg && flag_whole_program;
2379 struct simple_ipa_opt_pass pass_ipa_matrix_reorg =
2382 SIMPLE_IPA_PASS,
2383 "matrix-reorg", /* name */
2384 gate_matrix_reorg, /* gate */
2385 matrix_reorg, /* execute */
2386 NULL, /* sub */
2387 NULL, /* next */
2388 0, /* static_pass_number */
2389 TV_NONE, /* tv_id */
2390 0, /* properties_required */
2391 0, /* properties_provided */
2392 0, /* properties_destroyed */
2393 0, /* todo_flags_start */
2394 TODO_dump_cgraph | TODO_dump_func /* todo_flags_finish */