PR c++/53989
[official-gcc.git] / gcc / matrix-reorg.c
blob048cc7fdd5f7c1cc5a7a84b7850afac0475eebc9
1 /* Matrix layout transformations.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
3 Contributed by Razya Ladelsky <razya@il.ibm.com>
4 Originally written by Revital Eres and Mustafa Hagog.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
23 Matrix flattening optimization tries to replace a N-dimensional
24 matrix with its equivalent M-dimensional matrix, where M < N.
25 This first implementation focuses on global matrices defined dynamically.
27 When N==1, we actually flatten the whole matrix.
28 For instance consider a two-dimensional array a [dim1] [dim2].
29 The code for allocating space for it usually looks like:
31 a = (int **) malloc(dim1 * sizeof(int *));
32 for (i=0; i<dim1; i++)
33 a[i] = (int *) malloc (dim2 * sizeof(int));
35 If the array "a" is found suitable for this optimization,
36 its allocation is replaced by:
38 a = (int *) malloc (dim1 * dim2 *sizeof(int));
40 and all the references to a[i][j] are replaced by a[i * dim2 + j].
42 The two main phases of the optimization are the analysis
43 and transformation.
44 The driver of the optimization is matrix_reorg ().
48 Analysis phase:
49 ===============
51 We'll number the dimensions outside-in, meaning the most external
52 is 0, then 1, and so on.
53 The analysis part of the optimization determines K, the escape
54 level of a N-dimensional matrix (K <= N), that allows flattening of
55 the external dimensions 0,1,..., K-1. Escape level 0 means that the
56 whole matrix escapes and no flattening is possible.
58 The analysis part is implemented in analyze_matrix_allocation_site()
59 and analyze_matrix_accesses().
61 Transformation phase:
62 =====================
63 In this phase we define the new flattened matrices that replace the
64 original matrices in the code.
65 Implemented in transform_allocation_sites(),
66 transform_access_sites().
68 Matrix Transposing
69 ==================
70 The idea of Matrix Transposing is organizing the matrix in a different
71 layout such that the dimensions are reordered.
72 This could produce better cache behavior in some cases.
74 For example, lets look at the matrix accesses in the following loop:
76 for (i=0; i<N; i++)
77 for (j=0; j<M; j++)
78 access to a[i][j]
80 This loop can produce good cache behavior because the elements of
81 the inner dimension are accessed sequentially.
83 However, if the accesses of the matrix were of the following form:
85 for (i=0; i<N; i++)
86 for (j=0; j<M; j++)
87 access to a[j][i]
89 In this loop we iterate the columns and not the rows.
90 Therefore, replacing the rows and columns
91 would have had an organization with better (cache) locality.
92 Replacing the dimensions of the matrix is called matrix transposing.
94 This example, of course, could be enhanced to multiple dimensions matrices
95 as well.
97 Since a program could include all kind of accesses, there is a decision
98 mechanism, implemented in analyze_transpose(), which implements a
99 heuristic that tries to determine whether to transpose the matrix or not,
100 according to the form of the more dominant accesses.
101 This decision is transferred to the flattening mechanism, and whether
102 the matrix was transposed or not, the matrix is flattened (if possible).
104 This decision making is based on profiling information and loop information.
105 If profiling information is available, decision making mechanism will be
106 operated, otherwise the matrix will only be flattened (if possible).
108 Both optimizations are described in the paper "Matrix flattening and
109 transposing in GCC" which was presented in GCC summit 2006.
110 http://www.gccsummit.org/2006/2006-GCC-Summit-Proceedings.pdf. */
112 #include "config.h"
113 #include "system.h"
114 #include "coretypes.h"
115 #include "tm.h"
116 #include "tree.h"
117 #include "rtl.h"
118 #include "tree-inline.h"
119 #include "tree-flow.h"
120 #include "tree-flow-inline.h"
121 #include "langhooks.h"
122 #include "hashtab.h"
123 #include "flags.h"
124 #include "ggc.h"
125 #include "debug.h"
126 #include "target.h"
127 #include "cgraph.h"
128 #include "diagnostic-core.h"
129 #include "params.h"
130 #include "intl.h"
131 #include "function.h"
132 #include "basic-block.h"
133 #include "cfgloop.h"
134 #include "tree-iterator.h"
135 #include "tree-pass.h"
136 #include "opts.h"
137 #include "tree-data-ref.h"
138 #include "tree-chrec.h"
139 #include "tree-scalar-evolution.h"
140 #include "tree-ssa-sccvn.h"
142 /* We need to collect a lot of data from the original malloc,
143 particularly as the gimplifier has converted:
145 orig_var = (struct_type *) malloc (x * sizeof (struct_type *));
147 into
149 T3 = <constant> ; ** <constant> is amount to malloc; precomputed **
150 T4 = malloc (T3);
151 T5 = (struct_type *) T4;
152 orig_var = T5;
154 The following struct fields allow us to collect all the necessary data from
155 the gimplified program. The comments in the struct below are all based
156 on the gimple example above. */
158 struct malloc_call_data
160 gimple call_stmt; /* Tree for "T4 = malloc (T3);" */
161 tree size_var; /* Var decl for T3. */
162 tree malloc_size; /* Tree for "<constant>", the rhs assigned to T3. */
165 static tree can_calculate_expr_before_stmt (tree, sbitmap);
166 static tree can_calculate_stmt_before_stmt (gimple, sbitmap);
168 /* The front end of the compiler, when parsing statements of the form:
170 var = (type_cast) malloc (sizeof (type));
172 always converts this single statement into the following statements
173 (GIMPLE form):
175 T.1 = sizeof (type);
176 T.2 = malloc (T.1);
177 T.3 = (type_cast) T.2;
178 var = T.3;
180 Since we need to create new malloc statements and modify the original
181 statements somewhat, we need to find all four of the above statements.
182 Currently record_call_1 (called for building cgraph edges) finds and
183 records the statements containing the actual call to malloc, but we
184 need to find the rest of the variables/statements on our own. That
185 is what the following function does. */
186 static void
187 collect_data_for_malloc_call (gimple stmt, struct malloc_call_data *m_data)
189 tree size_var = NULL;
190 tree malloc_fn_decl;
191 tree arg1;
193 gcc_assert (is_gimple_call (stmt));
195 malloc_fn_decl = gimple_call_fndecl (stmt);
196 if (malloc_fn_decl == NULL
197 || DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
198 return;
200 arg1 = gimple_call_arg (stmt, 0);
201 size_var = arg1;
203 m_data->call_stmt = stmt;
204 m_data->size_var = size_var;
205 if (TREE_CODE (size_var) != VAR_DECL)
206 m_data->malloc_size = size_var;
207 else
208 m_data->malloc_size = NULL_TREE;
211 /* Information about matrix access site.
212 For example: if an access site of matrix arr is arr[i][j]
213 the ACCESS_SITE_INFO structure will have the address
214 of arr as its stmt. The INDEX_INFO will hold information about the
215 initial address and index of each dimension. */
216 struct access_site_info
218 /* The statement (MEM_REF or POINTER_PLUS_EXPR). */
219 gimple stmt;
221 /* In case of POINTER_PLUS_EXPR, what is the offset. */
222 tree offset;
224 /* The index which created the offset. */
225 tree index;
227 /* The indirection level of this statement. */
228 int level;
230 /* TRUE for allocation site FALSE for access site. */
231 bool is_alloc;
233 /* The function containing the access site. */
234 tree function_decl;
236 /* This access is iterated in the inner most loop */
237 bool iterated_by_inner_most_loop_p;
240 typedef struct access_site_info *access_site_info_p;
241 DEF_VEC_P (access_site_info_p);
242 DEF_VEC_ALLOC_P (access_site_info_p, heap);
244 /* Calls to free when flattening a matrix. */
246 struct free_info
248 gimple stmt;
249 tree func;
252 /* Information about matrix to flatten. */
253 struct matrix_info
255 /* Decl tree of this matrix. */
256 tree decl;
257 /* Number of dimensions; number
258 of "*" in the type declaration. */
259 int num_dims;
261 /* Minimum indirection level that escapes, 0 means that
262 the whole matrix escapes, k means that dimensions
263 0 to ACTUAL_DIM - k escapes. */
264 int min_indirect_level_escape;
266 gimple min_indirect_level_escape_stmt;
268 /* Hold the allocation site for each level (dimension).
269 We can use NUM_DIMS as the upper bound and allocate the array
270 once with this number of elements and no need to use realloc and
271 MAX_MALLOCED_LEVEL. */
272 gimple *malloc_for_level;
274 int max_malloced_level;
276 /* Is the matrix transposed. */
277 bool is_transposed_p;
279 /* The location of the allocation sites (they must be in one
280 function). */
281 tree allocation_function_decl;
283 /* The calls to free for each level of indirection. */
284 struct free_info *free_stmts;
286 /* An array which holds for each dimension its size. where
287 dimension 0 is the outer most (one that contains all the others).
289 tree *dimension_size;
291 /* An array which holds for each dimension it's original size
292 (before transposing and flattening take place). */
293 tree *dimension_size_orig;
295 /* An array which holds for each dimension the size of the type of
296 of elements accessed in that level (in bytes). */
297 HOST_WIDE_INT *dimension_type_size;
299 int dimension_type_size_len;
301 /* An array collecting the count of accesses for each dimension. */
302 gcov_type *dim_hot_level;
304 /* An array of the accesses to be flattened.
305 elements are of type "struct access_site_info *". */
306 VEC (access_site_info_p, heap) * access_l;
308 /* A map of how the dimensions will be organized at the end of
309 the analyses. */
310 int *dim_map;
313 /* In each phi node we want to record the indirection level we have when we
314 get to the phi node. Usually we will have phi nodes with more than two
315 arguments, then we must assure that all of them get to the phi node with
316 the same indirection level, otherwise it's not safe to do the flattening.
317 So we record the information regarding the indirection level each time we
318 get to the phi node in this hash table. */
320 struct matrix_access_phi_node
322 gimple phi;
323 int indirection_level;
326 /* We use this structure to find if the SSA variable is accessed inside the
327 tree and record the tree containing it. */
329 struct ssa_acc_in_tree
331 /* The variable whose accesses in the tree we are looking for. */
332 tree ssa_var;
333 /* The tree and code inside it the ssa_var is accessed, currently
334 it could be an MEM_REF or CALL_EXPR. */
335 enum tree_code t_code;
336 tree t_tree;
337 /* The place in the containing tree. */
338 tree *tp;
339 tree second_op;
340 bool var_found;
343 static void analyze_matrix_accesses (struct matrix_info *, tree, int, bool,
344 sbitmap, bool);
345 static int transform_allocation_sites (void **, void *);
346 static int transform_access_sites (void **, void *);
347 static int analyze_transpose (void **, void *);
348 static int dump_matrix_reorg_analysis (void **, void *);
350 static bool check_transpose_p;
352 /* Hash function used for the phi nodes. */
354 static hashval_t
355 mat_acc_phi_hash (const void *p)
357 const struct matrix_access_phi_node *const ma_phi =
358 (const struct matrix_access_phi_node *) p;
360 return htab_hash_pointer (ma_phi->phi);
363 /* Equality means phi node pointers are the same. */
365 static int
366 mat_acc_phi_eq (const void *p1, const void *p2)
368 const struct matrix_access_phi_node *const phi1 =
369 (const struct matrix_access_phi_node *) p1;
370 const struct matrix_access_phi_node *const phi2 =
371 (const struct matrix_access_phi_node *) p2;
373 if (phi1->phi == phi2->phi)
374 return 1;
376 return 0;
379 /* Hold the PHI nodes we visit during the traversal for escaping
380 analysis. */
381 static htab_t htab_mat_acc_phi_nodes = NULL;
383 /* This hash-table holds the information about the matrices we are
384 going to handle. */
385 static htab_t matrices_to_reorg = NULL;
387 /* Return a hash for MTT, which is really a "matrix_info *". */
388 static hashval_t
389 mtt_info_hash (const void *mtt)
391 return htab_hash_pointer (((const struct matrix_info *) mtt)->decl);
394 /* Return true if MTT1 and MTT2 (which are really both of type
395 "matrix_info *") refer to the same decl. */
396 static int
397 mtt_info_eq (const void *mtt1, const void *mtt2)
399 const struct matrix_info *const i1 = (const struct matrix_info *) mtt1;
400 const struct matrix_info *const i2 = (const struct matrix_info *) mtt2;
402 if (i1->decl == i2->decl)
403 return true;
405 return false;
408 /* Return false if STMT may contain a vector expression.
409 In this situation, all matrices should not be flattened. */
410 static bool
411 may_flatten_matrices_1 (gimple stmt)
413 switch (gimple_code (stmt))
415 case GIMPLE_ASSIGN:
416 case GIMPLE_CALL:
417 if (!gimple_has_lhs (stmt))
418 return true;
419 if (TREE_CODE (TREE_TYPE (gimple_get_lhs (stmt))) == VECTOR_TYPE)
421 if (dump_file)
422 fprintf (dump_file,
423 "Found vector type, don't flatten matrix\n");
424 return false;
426 break;
427 case GIMPLE_ASM:
428 /* Asm code could contain vector operations. */
429 return false;
430 break;
431 default:
432 break;
434 return true;
437 /* Return false if there are hand-written vectors in the program.
438 We disable the flattening in such a case. */
439 static bool
440 may_flatten_matrices (struct cgraph_node *node)
442 tree decl;
443 struct function *func;
444 basic_block bb;
445 gimple_stmt_iterator gsi;
447 decl = node->symbol.decl;
448 if (node->analyzed)
450 func = DECL_STRUCT_FUNCTION (decl);
451 FOR_EACH_BB_FN (bb, func)
452 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
453 if (!may_flatten_matrices_1 (gsi_stmt (gsi)))
454 return false;
456 return true;
459 /* Given a VAR_DECL, check its type to determine whether it is
460 a definition of a dynamic allocated matrix and therefore is
461 a suitable candidate for the matrix flattening optimization.
462 Return NULL if VAR_DECL is not such decl. Otherwise, allocate
463 a MATRIX_INFO structure, fill it with the relevant information
464 and return a pointer to it.
465 TODO: handle also statically defined arrays. */
466 static struct matrix_info *
467 analyze_matrix_decl (tree var_decl)
469 struct matrix_info *m_node, tmpmi, *mi;
470 tree var_type;
471 int dim_num = 0;
473 gcc_assert (matrices_to_reorg);
475 if (TREE_CODE (var_decl) == PARM_DECL)
476 var_type = DECL_ARG_TYPE (var_decl);
477 else if (TREE_CODE (var_decl) == VAR_DECL)
478 var_type = TREE_TYPE (var_decl);
479 else
480 return NULL;
482 if (!POINTER_TYPE_P (var_type))
483 return NULL;
485 while (POINTER_TYPE_P (var_type))
487 var_type = TREE_TYPE (var_type);
488 dim_num++;
491 if (dim_num <= 1)
492 return NULL;
494 if (!COMPLETE_TYPE_P (var_type)
495 || TREE_CODE (TYPE_SIZE_UNIT (var_type)) != INTEGER_CST)
496 return NULL;
498 /* Check to see if this pointer is already in there. */
499 tmpmi.decl = var_decl;
500 mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi);
502 if (mi)
503 return NULL;
505 /* Record the matrix. */
507 m_node = (struct matrix_info *) xcalloc (1, sizeof (struct matrix_info));
508 m_node->decl = var_decl;
509 m_node->num_dims = dim_num;
510 m_node->free_stmts
511 = (struct free_info *) xcalloc (dim_num, sizeof (struct free_info));
513 /* Init min_indirect_level_escape to -1 to indicate that no escape
514 analysis has been done yet. */
515 m_node->min_indirect_level_escape = -1;
516 m_node->is_transposed_p = false;
518 return m_node;
521 /* Free matrix E. */
522 static void
523 mat_free (void *e)
525 struct matrix_info *mat = (struct matrix_info *) e;
527 if (!mat)
528 return;
530 free (mat->free_stmts);
531 free (mat->dim_hot_level);
532 free (mat->malloc_for_level);
535 /* Find all potential matrices.
536 TODO: currently we handle only multidimensional
537 dynamically allocated arrays. */
538 static void
539 find_matrices_decl (void)
541 struct matrix_info *tmp;
542 PTR *slot;
543 struct varpool_node *vnode;
545 gcc_assert (matrices_to_reorg);
547 /* For every global variable in the program:
548 Check to see if it's of a candidate type and record it. */
549 FOR_EACH_DEFINED_VARIABLE (vnode)
551 tree var_decl = vnode->symbol.decl;
553 if (!var_decl || TREE_CODE (var_decl) != VAR_DECL)
554 continue;
556 if (matrices_to_reorg)
557 if ((tmp = analyze_matrix_decl (var_decl)))
559 if (!TREE_ADDRESSABLE (var_decl))
561 slot = htab_find_slot (matrices_to_reorg, tmp, INSERT);
562 *slot = tmp;
566 return;
569 /* Mark that the matrix MI escapes at level L. */
570 static void
571 mark_min_matrix_escape_level (struct matrix_info *mi, int l, gimple s)
573 if (mi->min_indirect_level_escape == -1
574 || (mi->min_indirect_level_escape > l))
576 mi->min_indirect_level_escape = l;
577 mi->min_indirect_level_escape_stmt = s;
581 /* Find if the SSA variable is accessed inside the
582 tree and record the tree containing it.
583 The only relevant uses are the case of SSA_NAME, or SSA inside
584 MEM_REF, PLUS_EXPR, POINTER_PLUS_EXPR, MULT_EXPR. */
585 static void
586 ssa_accessed_in_tree (tree t, struct ssa_acc_in_tree *a)
588 a->t_code = TREE_CODE (t);
589 switch (a->t_code)
591 case SSA_NAME:
592 if (t == a->ssa_var)
593 a->var_found = true;
594 break;
595 case MEM_REF:
596 if (SSA_VAR_P (TREE_OPERAND (t, 0))
597 && TREE_OPERAND (t, 0) == a->ssa_var)
598 a->var_found = true;
599 break;
600 default:
601 break;
605 /* Find if the SSA variable is accessed on the right hand side of
606 gimple call STMT. */
608 static void
609 ssa_accessed_in_call_rhs (gimple stmt, struct ssa_acc_in_tree *a)
611 tree decl;
612 tree arg;
613 size_t i;
615 a->t_code = CALL_EXPR;
616 for (i = 0; i < gimple_call_num_args (stmt); i++)
618 arg = gimple_call_arg (stmt, i);
619 if (arg == a->ssa_var)
621 a->var_found = true;
622 decl = gimple_call_fndecl (stmt);
623 a->t_tree = decl;
624 break;
629 /* Find if the SSA variable is accessed on the right hand side of
630 gimple assign STMT. */
632 static void
633 ssa_accessed_in_assign_rhs (gimple stmt, struct ssa_acc_in_tree *a)
636 a->t_code = gimple_assign_rhs_code (stmt);
637 switch (a->t_code)
639 tree op1, op2;
641 case SSA_NAME:
642 case MEM_REF:
643 CASE_CONVERT:
644 case VIEW_CONVERT_EXPR:
645 ssa_accessed_in_tree (gimple_assign_rhs1 (stmt), a);
646 break;
647 case POINTER_PLUS_EXPR:
648 case PLUS_EXPR:
649 case MULT_EXPR:
650 op1 = gimple_assign_rhs1 (stmt);
651 op2 = gimple_assign_rhs2 (stmt);
653 if (op1 == a->ssa_var)
655 a->var_found = true;
656 a->second_op = op2;
658 else if (op2 == a->ssa_var)
660 a->var_found = true;
661 a->second_op = op1;
663 break;
664 default:
665 break;
669 /* Record the access/allocation site information for matrix MI so we can
670 handle it later in transformation. */
671 static void
672 record_access_alloc_site_info (struct matrix_info *mi, gimple stmt, tree offset,
673 tree index, int level, bool is_alloc)
675 struct access_site_info *acc_info;
677 if (!mi->access_l)
678 mi->access_l = VEC_alloc (access_site_info_p, heap, 100);
680 acc_info
681 = (struct access_site_info *)
682 xcalloc (1, sizeof (struct access_site_info));
683 acc_info->stmt = stmt;
684 acc_info->offset = offset;
685 acc_info->index = index;
686 acc_info->function_decl = current_function_decl;
687 acc_info->level = level;
688 acc_info->is_alloc = is_alloc;
690 VEC_safe_push (access_site_info_p, heap, mi->access_l, acc_info);
694 /* Record the malloc as the allocation site of the given LEVEL. But
695 first we Make sure that all the size parameters passed to malloc in
696 all the allocation sites could be pre-calculated before the call to
697 the malloc of level 0 (the main malloc call). */
698 static void
699 add_allocation_site (struct matrix_info *mi, gimple stmt, int level)
701 struct malloc_call_data mcd;
703 /* Make sure that the allocation sites are in the same function. */
704 if (!mi->allocation_function_decl)
705 mi->allocation_function_decl = current_function_decl;
706 else if (mi->allocation_function_decl != current_function_decl)
708 int min_malloc_level;
710 gcc_assert (mi->malloc_for_level);
712 /* Find the minimum malloc level that already has been seen;
713 we known its allocation function must be
714 MI->allocation_function_decl since it's different than
715 CURRENT_FUNCTION_DECL then the escaping level should be
716 MIN (LEVEL, MIN_MALLOC_LEVEL) - 1 , and the allocation function
717 must be set accordingly. */
718 for (min_malloc_level = 0;
719 min_malloc_level < mi->max_malloced_level
720 && mi->malloc_for_level[min_malloc_level]; min_malloc_level++)
722 if (level < min_malloc_level)
724 mi->allocation_function_decl = current_function_decl;
725 mark_min_matrix_escape_level (mi, min_malloc_level, stmt);
727 else
729 mark_min_matrix_escape_level (mi, level, stmt);
730 /* cannot be that (level == min_malloc_level)
731 we would have returned earlier. */
732 return;
736 /* Find the correct malloc information. */
737 collect_data_for_malloc_call (stmt, &mcd);
739 /* We accept only calls to malloc function; we do not accept
740 calls like calloc and realloc. */
741 if (!mi->malloc_for_level)
743 mi->malloc_for_level = XCNEWVEC (gimple, level + 1);
744 mi->max_malloced_level = level + 1;
746 else if (mi->max_malloced_level <= level)
748 mi->malloc_for_level
749 = XRESIZEVEC (gimple, mi->malloc_for_level, level + 1);
751 /* Zero the newly allocated items. */
752 memset (&(mi->malloc_for_level[mi->max_malloced_level + 1]),
753 0, (level - mi->max_malloced_level) * sizeof (tree));
755 mi->max_malloced_level = level + 1;
757 mi->malloc_for_level[level] = stmt;
760 /* Given an assignment statement STMT that we know that its
761 left-hand-side is the matrix MI variable, we traverse the immediate
762 uses backwards until we get to a malloc site. We make sure that
763 there is one and only one malloc site that sets this variable. When
764 we are performing the flattening we generate a new variable that
765 will hold the size for each dimension; each malloc that allocates a
766 dimension has the size parameter; we use that parameter to
767 initialize the dimension size variable so we can use it later in
768 the address calculations. LEVEL is the dimension we're inspecting.
769 Return if STMT is related to an allocation site. */
771 static void
772 analyze_matrix_allocation_site (struct matrix_info *mi, gimple stmt,
773 int level, sbitmap visited)
775 if (gimple_assign_copy_p (stmt) || gimple_assign_cast_p (stmt))
777 tree rhs = gimple_assign_rhs1 (stmt);
779 if (TREE_CODE (rhs) == SSA_NAME)
781 gimple def = SSA_NAME_DEF_STMT (rhs);
783 analyze_matrix_allocation_site (mi, def, level, visited);
784 return;
786 /* If we are back to the original matrix variable then we
787 are sure that this is analyzed as an access site. */
788 else if (rhs == mi->decl)
789 return;
791 /* A result of call to malloc. */
792 else if (is_gimple_call (stmt))
794 int call_flags = gimple_call_flags (stmt);
796 if (!(call_flags & ECF_MALLOC))
798 mark_min_matrix_escape_level (mi, level, stmt);
799 return;
801 else
803 tree malloc_fn_decl;
805 malloc_fn_decl = gimple_call_fndecl (stmt);
806 if (malloc_fn_decl == NULL_TREE)
808 mark_min_matrix_escape_level (mi, level, stmt);
809 return;
811 if (DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
813 if (dump_file)
814 fprintf (dump_file,
815 "Matrix %s is an argument to function %s\n",
816 get_name (mi->decl), get_name (malloc_fn_decl));
817 mark_min_matrix_escape_level (mi, level, stmt);
818 return;
821 /* This is a call to malloc of level 'level'.
822 mi->max_malloced_level-1 == level means that we've
823 seen a malloc statement of level 'level' before.
824 If the statement is not the same one that we've
825 seen before, then there's another malloc statement
826 for the same level, which means that we need to mark
827 it escaping. */
828 if (mi->malloc_for_level
829 && mi->max_malloced_level-1 == level
830 && mi->malloc_for_level[level] != stmt)
832 mark_min_matrix_escape_level (mi, level, stmt);
833 return;
835 else
836 add_allocation_site (mi, stmt, level);
837 return;
839 /* Looks like we don't know what is happening in this
840 statement so be in the safe side and mark it as escaping. */
841 mark_min_matrix_escape_level (mi, level, stmt);
844 /* The transposing decision making.
845 In order to calculate the profitability of transposing, we collect two
846 types of information regarding the accesses:
847 1. profiling information used to express the hotness of an access, that
848 is how often the matrix is accessed by this access site (count of the
849 access site).
850 2. which dimension in the access site is iterated by the inner
851 most loop containing this access.
853 The matrix will have a calculated value of weighted hotness for each
854 dimension.
855 Intuitively the hotness level of a dimension is a function of how
856 many times it was the most frequently accessed dimension in the
857 highly executed access sites of this matrix.
859 As computed by following equation:
861 __ __
862 \ \ dim_hot_level[i] +=
863 /_ /_
865 acc[j]->dim[i]->iter_by_inner_loop * count(j)
867 Where n is the number of dims and m is the number of the matrix
868 access sites. acc[j]->dim[i]->iter_by_inner_loop is 1 if acc[j]
869 iterates over dim[i] in innermost loop, and is 0 otherwise.
871 The organization of the new matrix should be according to the
872 hotness of each dimension. The hotness of the dimension implies
873 the locality of the elements.*/
874 static int
875 analyze_transpose (void **slot, void *data ATTRIBUTE_UNUSED)
877 struct matrix_info *mi = (struct matrix_info *) *slot;
878 int min_escape_l = mi->min_indirect_level_escape;
879 struct loop *loop;
880 affine_iv iv;
881 struct access_site_info *acc_info;
882 int i;
884 if (min_escape_l < 2 || !mi->access_l)
886 if (mi->access_l)
888 FOR_EACH_VEC_ELT (access_site_info_p, mi->access_l, i, acc_info)
889 free (acc_info);
890 VEC_free (access_site_info_p, heap, mi->access_l);
893 return 1;
895 if (!mi->dim_hot_level)
896 mi->dim_hot_level =
897 (gcov_type *) xcalloc (min_escape_l, sizeof (gcov_type));
900 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
901 i++)
903 if (gimple_assign_rhs_code (acc_info->stmt) == POINTER_PLUS_EXPR
904 && acc_info->level < min_escape_l)
906 loop = loop_containing_stmt (acc_info->stmt);
907 if (!loop || loop->inner)
909 free (acc_info);
910 continue;
912 if (simple_iv (loop, loop, acc_info->offset, &iv, true))
914 if (iv.step != NULL)
916 HOST_WIDE_INT istep;
918 istep = int_cst_value (iv.step);
919 if (istep != 0)
921 acc_info->iterated_by_inner_most_loop_p = 1;
922 mi->dim_hot_level[acc_info->level] +=
923 gimple_bb (acc_info->stmt)->count;
929 free (acc_info);
931 VEC_free (access_site_info_p, heap, mi->access_l);
933 return 1;
936 /* Find the index which defines the OFFSET from base.
937 We walk from use to def until we find how the offset was defined. */
938 static tree
939 get_index_from_offset (tree offset, gimple def_stmt)
941 tree op1, op2, index;
943 if (gimple_code (def_stmt) == GIMPLE_PHI)
944 return NULL;
945 if ((gimple_assign_copy_p (def_stmt) || gimple_assign_cast_p (def_stmt))
946 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
947 return get_index_from_offset (offset,
948 SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt)));
949 else if (is_gimple_assign (def_stmt)
950 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
952 op1 = gimple_assign_rhs1 (def_stmt);
953 op2 = gimple_assign_rhs2 (def_stmt);
954 if (TREE_CODE (op1) != INTEGER_CST && TREE_CODE (op2) != INTEGER_CST)
955 return NULL;
956 index = (TREE_CODE (op1) == INTEGER_CST) ? op2 : op1;
957 return index;
959 else
960 return NULL_TREE;
963 /* update MI->dimension_type_size[CURRENT_INDIRECT_LEVEL] with the size
964 of the type related to the SSA_VAR, or the type related to the
965 lhs of STMT, in the case that it is an MEM_REF. */
966 static void
967 update_type_size (struct matrix_info *mi, gimple stmt, tree ssa_var,
968 int current_indirect_level)
970 tree lhs;
971 HOST_WIDE_INT type_size;
973 /* Update type according to the type of the MEM_REF expr. */
974 if (is_gimple_assign (stmt)
975 && TREE_CODE (gimple_assign_lhs (stmt)) == MEM_REF)
977 lhs = gimple_assign_lhs (stmt);
978 gcc_assert (POINTER_TYPE_P
979 (TREE_TYPE (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
980 type_size =
981 int_size_in_bytes (TREE_TYPE
982 (TREE_TYPE
983 (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
985 else
986 type_size = int_size_in_bytes (TREE_TYPE (ssa_var));
988 /* Record the size of elements accessed (as a whole)
989 in the current indirection level (dimension). If the size of
990 elements is not known at compile time, mark it as escaping. */
991 if (type_size <= 0)
992 mark_min_matrix_escape_level (mi, current_indirect_level, stmt);
993 else
995 int l = current_indirect_level;
997 if (!mi->dimension_type_size)
999 mi->dimension_type_size
1000 = (HOST_WIDE_INT *) xcalloc (l + 1, sizeof (HOST_WIDE_INT));
1001 mi->dimension_type_size_len = l + 1;
1003 else if (mi->dimension_type_size_len < l + 1)
1005 mi->dimension_type_size
1006 = (HOST_WIDE_INT *) xrealloc (mi->dimension_type_size,
1007 (l + 1) * sizeof (HOST_WIDE_INT));
1008 memset (&mi->dimension_type_size[mi->dimension_type_size_len],
1009 0, (l + 1 - mi->dimension_type_size_len)
1010 * sizeof (HOST_WIDE_INT));
1011 mi->dimension_type_size_len = l + 1;
1013 /* Make sure all the accesses in the same level have the same size
1014 of the type. */
1015 if (!mi->dimension_type_size[l])
1016 mi->dimension_type_size[l] = type_size;
1017 else if (mi->dimension_type_size[l] != type_size)
1018 mark_min_matrix_escape_level (mi, l, stmt);
1022 /* USE_STMT represents a GIMPLE_CALL, where one of the arguments is the
1023 ssa var that we want to check because it came from some use of matrix
1024 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so
1025 far. */
1027 static int
1028 analyze_accesses_for_call_stmt (struct matrix_info *mi, tree ssa_var,
1029 gimple use_stmt, int current_indirect_level)
1031 tree fndecl = gimple_call_fndecl (use_stmt);
1033 if (gimple_call_lhs (use_stmt))
1035 tree lhs = gimple_call_lhs (use_stmt);
1036 struct ssa_acc_in_tree lhs_acc, rhs_acc;
1038 memset (&lhs_acc, 0, sizeof (lhs_acc));
1039 memset (&rhs_acc, 0, sizeof (rhs_acc));
1041 lhs_acc.ssa_var = ssa_var;
1042 lhs_acc.t_code = ERROR_MARK;
1043 ssa_accessed_in_tree (lhs, &lhs_acc);
1044 rhs_acc.ssa_var = ssa_var;
1045 rhs_acc.t_code = ERROR_MARK;
1046 ssa_accessed_in_call_rhs (use_stmt, &rhs_acc);
1048 /* The SSA must be either in the left side or in the right side,
1049 to understand what is happening.
1050 In case the SSA_NAME is found in both sides we should be escaping
1051 at this level because in this case we cannot calculate the
1052 address correctly. */
1053 if ((lhs_acc.var_found && rhs_acc.var_found
1054 && lhs_acc.t_code == MEM_REF)
1055 || (!rhs_acc.var_found && !lhs_acc.var_found))
1057 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1058 return current_indirect_level;
1060 gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1062 /* If we are storing to the matrix at some level, then mark it as
1063 escaping at that level. */
1064 if (lhs_acc.var_found)
1066 int l = current_indirect_level + 1;
1068 gcc_assert (lhs_acc.t_code == MEM_REF);
1069 mark_min_matrix_escape_level (mi, l, use_stmt);
1070 return current_indirect_level;
1074 if (fndecl)
1076 if (DECL_FUNCTION_CODE (fndecl) != BUILT_IN_FREE)
1078 if (dump_file)
1079 fprintf (dump_file,
1080 "Matrix %s: Function call %s, level %d escapes.\n",
1081 get_name (mi->decl), get_name (fndecl),
1082 current_indirect_level);
1083 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1085 else if (mi->free_stmts[current_indirect_level].stmt != NULL
1086 && mi->free_stmts[current_indirect_level].stmt != use_stmt)
1087 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1088 else
1090 /*Record the free statements so we can delete them
1091 later. */
1092 int l = current_indirect_level;
1094 mi->free_stmts[l].stmt = use_stmt;
1095 mi->free_stmts[l].func = current_function_decl;
1098 return current_indirect_level;
1101 /* USE_STMT represents a phi node of the ssa var that we want to
1102 check because it came from some use of matrix
1104 We check all the escaping levels that get to the PHI node
1105 and make sure they are all the same escaping;
1106 if not (which is rare) we let the escaping level be the
1107 minimum level that gets into that PHI because starting from
1108 that level we cannot expect the behavior of the indirections.
1109 CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1111 static void
1112 analyze_accesses_for_phi_node (struct matrix_info *mi, gimple use_stmt,
1113 int current_indirect_level, sbitmap visited,
1114 bool record_accesses)
1117 struct matrix_access_phi_node tmp_maphi, *maphi, **pmaphi;
1119 tmp_maphi.phi = use_stmt;
1120 if ((maphi = (struct matrix_access_phi_node *)
1121 htab_find (htab_mat_acc_phi_nodes, &tmp_maphi)))
1123 if (maphi->indirection_level == current_indirect_level)
1124 return;
1125 else
1127 int level = MIN (maphi->indirection_level,
1128 current_indirect_level);
1129 size_t j;
1130 gimple stmt = NULL;
1132 maphi->indirection_level = level;
1133 for (j = 0; j < gimple_phi_num_args (use_stmt); j++)
1135 tree def = PHI_ARG_DEF (use_stmt, j);
1137 if (gimple_code (SSA_NAME_DEF_STMT (def)) != GIMPLE_PHI)
1138 stmt = SSA_NAME_DEF_STMT (def);
1140 mark_min_matrix_escape_level (mi, level, stmt);
1142 return;
1144 maphi = (struct matrix_access_phi_node *)
1145 xcalloc (1, sizeof (struct matrix_access_phi_node));
1146 maphi->phi = use_stmt;
1147 maphi->indirection_level = current_indirect_level;
1149 /* Insert to hash table. */
1150 pmaphi = (struct matrix_access_phi_node **)
1151 htab_find_slot (htab_mat_acc_phi_nodes, maphi, INSERT);
1152 gcc_assert (pmaphi);
1153 *pmaphi = maphi;
1155 if (!TEST_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt))))
1157 SET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1158 analyze_matrix_accesses (mi, PHI_RESULT (use_stmt),
1159 current_indirect_level, false, visited,
1160 record_accesses);
1161 RESET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1165 /* USE_STMT represents an assign statement (the rhs or lhs include
1166 the ssa var that we want to check because it came from some use of matrix
1167 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1169 static int
1170 analyze_accesses_for_assign_stmt (struct matrix_info *mi, tree ssa_var,
1171 gimple use_stmt, int current_indirect_level,
1172 bool last_op, sbitmap visited,
1173 bool record_accesses)
1175 tree lhs = gimple_get_lhs (use_stmt);
1176 struct ssa_acc_in_tree lhs_acc, rhs_acc;
1178 memset (&lhs_acc, 0, sizeof (lhs_acc));
1179 memset (&rhs_acc, 0, sizeof (rhs_acc));
1181 lhs_acc.ssa_var = ssa_var;
1182 lhs_acc.t_code = ERROR_MARK;
1183 ssa_accessed_in_tree (lhs, &lhs_acc);
1184 rhs_acc.ssa_var = ssa_var;
1185 rhs_acc.t_code = ERROR_MARK;
1186 ssa_accessed_in_assign_rhs (use_stmt, &rhs_acc);
1188 /* The SSA must be either in the left side or in the right side,
1189 to understand what is happening.
1190 In case the SSA_NAME is found in both sides we should be escaping
1191 at this level because in this case we cannot calculate the
1192 address correctly. */
1193 if ((lhs_acc.var_found && rhs_acc.var_found
1194 && lhs_acc.t_code == MEM_REF)
1195 || (!rhs_acc.var_found && !lhs_acc.var_found))
1197 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1198 return current_indirect_level;
1200 gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1202 /* If we are storing to the matrix at some level, then mark it as
1203 escaping at that level. */
1204 if (lhs_acc.var_found)
1206 int l = current_indirect_level + 1;
1208 gcc_assert (lhs_acc.t_code == MEM_REF);
1210 if (!(gimple_assign_copy_p (use_stmt)
1211 || gimple_assign_cast_p (use_stmt))
1212 || (TREE_CODE (gimple_assign_rhs1 (use_stmt)) != SSA_NAME))
1213 mark_min_matrix_escape_level (mi, l, use_stmt);
1214 else
1216 gimple def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (use_stmt));
1217 analyze_matrix_allocation_site (mi, def_stmt, l, visited);
1218 if (record_accesses)
1219 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1220 NULL_TREE, l, true);
1221 update_type_size (mi, use_stmt, NULL, l);
1223 return current_indirect_level;
1225 /* Now, check the right-hand-side, to see how the SSA variable
1226 is used. */
1227 if (rhs_acc.var_found)
1229 if (rhs_acc.t_code != MEM_REF
1230 && rhs_acc.t_code != POINTER_PLUS_EXPR && rhs_acc.t_code != SSA_NAME)
1232 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1233 return current_indirect_level;
1235 /* If the access in the RHS has an indirection increase the
1236 indirection level. */
1237 if (rhs_acc.t_code == MEM_REF)
1239 if (record_accesses)
1240 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1241 NULL_TREE,
1242 current_indirect_level, true);
1243 current_indirect_level += 1;
1245 else if (rhs_acc.t_code == POINTER_PLUS_EXPR)
1247 gcc_assert (rhs_acc.second_op);
1248 if (last_op)
1249 /* Currently we support only one PLUS expression on the
1250 SSA_NAME that holds the base address of the current
1251 indirection level; to support more general case there
1252 is a need to hold a stack of expressions and regenerate
1253 the calculation later. */
1254 mark_min_matrix_escape_level (mi, current_indirect_level,
1255 use_stmt);
1256 else
1258 tree index;
1259 tree op1, op2;
1261 op1 = gimple_assign_rhs1 (use_stmt);
1262 op2 = gimple_assign_rhs2 (use_stmt);
1264 op2 = (op1 == ssa_var) ? op2 : op1;
1265 if (TREE_CODE (op2) == INTEGER_CST)
1266 index =
1267 build_int_cst (TREE_TYPE (op1),
1268 TREE_INT_CST_LOW (op2) /
1269 int_size_in_bytes (TREE_TYPE (op1)));
1270 else
1272 index =
1273 get_index_from_offset (op2, SSA_NAME_DEF_STMT (op2));
1274 if (index == NULL_TREE)
1276 mark_min_matrix_escape_level (mi,
1277 current_indirect_level,
1278 use_stmt);
1279 return current_indirect_level;
1282 if (record_accesses)
1283 record_access_alloc_site_info (mi, use_stmt, op2,
1284 index,
1285 current_indirect_level, false);
1288 /* If we are storing this level of indirection mark it as
1289 escaping. */
1290 if (lhs_acc.t_code == MEM_REF || TREE_CODE (lhs) != SSA_NAME)
1292 int l = current_indirect_level;
1294 /* One exception is when we are storing to the matrix
1295 variable itself; this is the case of malloc, we must make
1296 sure that it's the one and only one call to malloc so
1297 we call analyze_matrix_allocation_site to check
1298 this out. */
1299 if (TREE_CODE (lhs) != VAR_DECL || lhs != mi->decl)
1300 mark_min_matrix_escape_level (mi, current_indirect_level,
1301 use_stmt);
1302 else
1304 /* Also update the escaping level. */
1305 analyze_matrix_allocation_site (mi, use_stmt, l, visited);
1306 if (record_accesses)
1307 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1308 NULL_TREE, l, true);
1311 else
1313 /* We are placing it in an SSA, follow that SSA. */
1314 analyze_matrix_accesses (mi, lhs,
1315 current_indirect_level,
1316 rhs_acc.t_code == POINTER_PLUS_EXPR,
1317 visited, record_accesses);
1320 return current_indirect_level;
1323 /* Given a SSA_VAR (coming from a use statement of the matrix MI),
1324 follow its uses and level of indirection and find out the minimum
1325 indirection level it escapes in (the highest dimension) and the maximum
1326 level it is accessed in (this will be the actual dimension of the
1327 matrix). The information is accumulated in MI.
1328 We look at the immediate uses, if one escapes we finish; if not,
1329 we make a recursive call for each one of the immediate uses of the
1330 resulting SSA name. */
1331 static void
1332 analyze_matrix_accesses (struct matrix_info *mi, tree ssa_var,
1333 int current_indirect_level, bool last_op,
1334 sbitmap visited, bool record_accesses)
1336 imm_use_iterator imm_iter;
1337 use_operand_p use_p;
1339 update_type_size (mi, SSA_NAME_DEF_STMT (ssa_var), ssa_var,
1340 current_indirect_level);
1342 /* We don't go beyond the escaping level when we are performing the
1343 flattening. NOTE: we keep the last indirection level that doesn't
1344 escape. */
1345 if (mi->min_indirect_level_escape > -1
1346 && mi->min_indirect_level_escape <= current_indirect_level)
1347 return;
1349 /* Now go over the uses of the SSA_NAME and check how it is used in
1350 each one of them. We are mainly looking for the pattern MEM_REF,
1351 then a POINTER_PLUS_EXPR, then MEM_REF etc. while in between there could
1352 be any number of copies and casts. */
1353 gcc_assert (TREE_CODE (ssa_var) == SSA_NAME);
1355 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_var)
1357 gimple use_stmt = USE_STMT (use_p);
1358 if (gimple_code (use_stmt) == GIMPLE_PHI)
1359 /* We check all the escaping levels that get to the PHI node
1360 and make sure they are all the same escaping;
1361 if not (which is rare) we let the escaping level be the
1362 minimum level that gets into that PHI because starting from
1363 that level we cannot expect the behavior of the indirections. */
1365 analyze_accesses_for_phi_node (mi, use_stmt, current_indirect_level,
1366 visited, record_accesses);
1368 else if (is_gimple_call (use_stmt))
1369 analyze_accesses_for_call_stmt (mi, ssa_var, use_stmt,
1370 current_indirect_level);
1371 else if (is_gimple_assign (use_stmt))
1372 current_indirect_level =
1373 analyze_accesses_for_assign_stmt (mi, ssa_var, use_stmt,
1374 current_indirect_level, last_op,
1375 visited, record_accesses);
1379 typedef struct
1381 tree fn;
1382 gimple stmt;
1383 } check_var_data;
1385 /* A walk_tree function to go over the VAR_DECL, PARM_DECL nodes of
1386 the malloc size expression and check that those aren't changed
1387 over the function. */
1388 static tree
1389 check_var_notmodified_p (tree * tp, int *walk_subtrees, void *data)
1391 basic_block bb;
1392 tree t = *tp;
1393 check_var_data *callback_data = (check_var_data*) data;
1394 tree fn = callback_data->fn;
1395 gimple_stmt_iterator gsi;
1396 gimple stmt;
1398 if (TREE_CODE (t) != VAR_DECL && TREE_CODE (t) != PARM_DECL)
1399 return NULL_TREE;
1401 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (fn))
1403 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1405 stmt = gsi_stmt (gsi);
1406 if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
1407 continue;
1408 if (gimple_get_lhs (stmt) == t)
1410 callback_data->stmt = stmt;
1411 return t;
1415 *walk_subtrees = 1;
1416 return NULL_TREE;
1419 /* Go backwards in the use-def chains and find out the expression
1420 represented by the possible SSA name in STMT, until it is composed
1421 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1422 we make sure that all the arguments represent the same subexpression,
1423 otherwise we fail. */
1425 static tree
1426 can_calculate_stmt_before_stmt (gimple stmt, sbitmap visited)
1428 tree op1, op2, res;
1429 enum tree_code code;
1431 switch (gimple_code (stmt))
1433 case GIMPLE_ASSIGN:
1434 code = gimple_assign_rhs_code (stmt);
1435 op1 = gimple_assign_rhs1 (stmt);
1437 switch (code)
1439 case POINTER_PLUS_EXPR:
1440 case PLUS_EXPR:
1441 case MINUS_EXPR:
1442 case MULT_EXPR:
1444 op2 = gimple_assign_rhs2 (stmt);
1445 op1 = can_calculate_expr_before_stmt (op1, visited);
1446 if (!op1)
1447 return NULL_TREE;
1448 op2 = can_calculate_expr_before_stmt (op2, visited);
1449 if (op2)
1450 return fold_build2 (code, gimple_expr_type (stmt), op1, op2);
1451 return NULL_TREE;
1453 CASE_CONVERT:
1454 res = can_calculate_expr_before_stmt (op1, visited);
1455 if (res != NULL_TREE)
1456 return build1 (code, gimple_expr_type (stmt), res);
1457 else
1458 return NULL_TREE;
1460 default:
1461 if (gimple_assign_single_p (stmt))
1462 return can_calculate_expr_before_stmt (op1, visited);
1463 else
1464 return NULL_TREE;
1467 case GIMPLE_PHI:
1469 size_t j;
1471 res = NULL_TREE;
1472 /* Make sure all the arguments represent the same value. */
1473 for (j = 0; j < gimple_phi_num_args (stmt); j++)
1475 tree new_res;
1476 tree def = PHI_ARG_DEF (stmt, j);
1478 new_res = can_calculate_expr_before_stmt (def, visited);
1479 if (res == NULL_TREE)
1480 res = new_res;
1481 else if (!new_res || !expressions_equal_p (res, new_res))
1482 return NULL_TREE;
1484 return res;
1487 default:
1488 return NULL_TREE;
1492 /* Go backwards in the use-def chains and find out the expression
1493 represented by the possible SSA name in EXPR, until it is composed
1494 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1495 we make sure that all the arguments represent the same subexpression,
1496 otherwise we fail. */
1497 static tree
1498 can_calculate_expr_before_stmt (tree expr, sbitmap visited)
1500 gimple def_stmt;
1501 tree res;
1503 switch (TREE_CODE (expr))
1505 case SSA_NAME:
1506 /* Case of loop, we don't know to represent this expression. */
1507 if (TEST_BIT (visited, SSA_NAME_VERSION (expr)))
1508 return NULL_TREE;
1510 SET_BIT (visited, SSA_NAME_VERSION (expr));
1511 def_stmt = SSA_NAME_DEF_STMT (expr);
1512 res = can_calculate_stmt_before_stmt (def_stmt, visited);
1513 RESET_BIT (visited, SSA_NAME_VERSION (expr));
1514 return res;
1515 case VAR_DECL:
1516 case PARM_DECL:
1517 case INTEGER_CST:
1518 return expr;
1520 default:
1521 return NULL_TREE;
1525 /* There should be only one allocation function for the dimensions
1526 that don't escape. Here we check the allocation sites in this
1527 function. We must make sure that all the dimensions are allocated
1528 using malloc and that the malloc size parameter expression could be
1529 pre-calculated before the call to the malloc of dimension 0.
1531 Given a candidate matrix for flattening -- MI -- check if it's
1532 appropriate for flattening -- we analyze the allocation
1533 sites that we recorded in the previous analysis. The result of the
1534 analysis is a level of indirection (matrix dimension) in which the
1535 flattening is safe. We check the following conditions:
1536 1. There is only one allocation site for each dimension.
1537 2. The allocation sites of all the dimensions are in the same
1538 function.
1539 (The above two are being taken care of during the analysis when
1540 we check the allocation site).
1541 3. All the dimensions that we flatten are allocated at once; thus
1542 the total size must be known before the allocation of the
1543 dimension 0 (top level) -- we must make sure we represent the
1544 size of the allocation as an expression of global parameters or
1545 constants and that those doesn't change over the function. */
1547 static int
1548 check_allocation_function (void **slot, void *data ATTRIBUTE_UNUSED)
1550 int level;
1551 struct matrix_info *mi = (struct matrix_info *) *slot;
1552 sbitmap visited;
1554 if (!mi->malloc_for_level)
1555 return 1;
1557 visited = sbitmap_alloc (num_ssa_names);
1559 /* Do nothing if the current function is not the allocation
1560 function of MI. */
1561 if (mi->allocation_function_decl != current_function_decl
1562 /* We aren't in the main allocation function yet. */
1563 || !mi->malloc_for_level[0])
1564 return 1;
1566 for (level = 1; level < mi->max_malloced_level; level++)
1567 if (!mi->malloc_for_level[level])
1568 break;
1570 mark_min_matrix_escape_level (mi, level, NULL);
1572 /* Check if the expression of the size passed to malloc could be
1573 pre-calculated before the malloc of level 0. */
1574 for (level = 1; level < mi->min_indirect_level_escape; level++)
1576 gimple call_stmt;
1577 tree size;
1578 struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE};
1580 call_stmt = mi->malloc_for_level[level];
1582 /* Find the correct malloc information. */
1583 collect_data_for_malloc_call (call_stmt, &mcd);
1585 /* No need to check anticipation for constants. */
1586 if (TREE_CODE (mcd.size_var) == INTEGER_CST)
1588 if (!mi->dimension_size)
1590 mi->dimension_size =
1591 (tree *) xcalloc (mi->min_indirect_level_escape,
1592 sizeof (tree));
1593 mi->dimension_size_orig =
1594 (tree *) xcalloc (mi->min_indirect_level_escape,
1595 sizeof (tree));
1597 mi->dimension_size[level] = mcd.size_var;
1598 mi->dimension_size_orig[level] = mcd.size_var;
1599 continue;
1601 /* ??? Here we should also add the way to calculate the size
1602 expression not only know that it is anticipated. */
1603 sbitmap_zero (visited);
1604 size = can_calculate_expr_before_stmt (mcd.size_var, visited);
1605 if (size == NULL_TREE)
1607 mark_min_matrix_escape_level (mi, level, call_stmt);
1608 if (dump_file)
1609 fprintf (dump_file,
1610 "Matrix %s: Cannot calculate the size of allocation, escaping at level %d\n",
1611 get_name (mi->decl), level);
1612 break;
1614 if (!mi->dimension_size)
1616 mi->dimension_size =
1617 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1618 mi->dimension_size_orig =
1619 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1621 mi->dimension_size[level] = size;
1622 mi->dimension_size_orig[level] = size;
1625 /* We don't need those anymore. */
1626 for (level = mi->min_indirect_level_escape;
1627 level < mi->max_malloced_level; level++)
1628 mi->malloc_for_level[level] = NULL;
1629 return 1;
1632 /* Track all access and allocation sites. */
1633 static void
1634 find_sites_in_func (bool record)
1636 sbitmap visited_stmts_1;
1638 gimple_stmt_iterator gsi;
1639 gimple stmt;
1640 basic_block bb;
1641 struct matrix_info tmpmi, *mi;
1643 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1645 FOR_EACH_BB (bb)
1647 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1649 tree lhs;
1651 stmt = gsi_stmt (gsi);
1652 lhs = gimple_get_lhs (stmt);
1653 if (lhs != NULL_TREE
1654 && TREE_CODE (lhs) == VAR_DECL)
1656 tmpmi.decl = lhs;
1657 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1658 &tmpmi)))
1660 sbitmap_zero (visited_stmts_1);
1661 analyze_matrix_allocation_site (mi, stmt, 0, visited_stmts_1);
1664 if (is_gimple_assign (stmt)
1665 && gimple_assign_single_p (stmt)
1666 && TREE_CODE (lhs) == SSA_NAME
1667 && TREE_CODE (gimple_assign_rhs1 (stmt)) == VAR_DECL)
1669 tmpmi.decl = gimple_assign_rhs1 (stmt);
1670 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1671 &tmpmi)))
1673 sbitmap_zero (visited_stmts_1);
1674 analyze_matrix_accesses (mi, lhs, 0,
1675 false, visited_stmts_1, record);
1680 sbitmap_free (visited_stmts_1);
1683 /* Traverse the use-def chains to see if there are matrices that
1684 are passed through pointers and we cannot know how they are accessed.
1685 For each SSA-name defined by a global variable of our interest,
1686 we traverse the use-def chains of the SSA and follow the indirections,
1687 and record in what level of indirection the use of the variable
1688 escapes. A use of a pointer escapes when it is passed to a function,
1689 stored into memory or assigned (except in malloc and free calls). */
1691 static void
1692 record_all_accesses_in_func (void)
1694 unsigned i;
1695 sbitmap visited_stmts_1;
1697 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1699 for (i = 0; i < num_ssa_names; i++)
1701 struct matrix_info tmpmi, *mi;
1702 tree ssa_var = ssa_name (i);
1703 tree rhs, lhs;
1705 if (!ssa_var
1706 || !is_gimple_assign (SSA_NAME_DEF_STMT (ssa_var))
1707 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (ssa_var)))
1708 continue;
1709 rhs = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (ssa_var));
1710 lhs = gimple_assign_lhs (SSA_NAME_DEF_STMT (ssa_var));
1711 if (TREE_CODE (rhs) != VAR_DECL && TREE_CODE (lhs) != VAR_DECL)
1712 continue;
1714 /* If the RHS is a matrix that we want to analyze, follow the def-use
1715 chain for this SSA_VAR and check for escapes or apply the
1716 flattening. */
1717 tmpmi.decl = rhs;
1718 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi)))
1720 /* This variable will track the visited PHI nodes, so we can limit
1721 its size to the maximum number of SSA names. */
1722 sbitmap_zero (visited_stmts_1);
1723 analyze_matrix_accesses (mi, ssa_var,
1724 0, false, visited_stmts_1, true);
1728 sbitmap_free (visited_stmts_1);
1731 /* Used when we want to convert the expression: RESULT = something *
1732 ORIG to RESULT = something * NEW_VAL. If ORIG and NEW_VAL are power
1733 of 2, shift operations can be done, else division and
1734 multiplication. */
1736 static tree
1737 compute_offset (HOST_WIDE_INT orig, HOST_WIDE_INT new_val, tree result)
1740 int x, y;
1741 tree result1, ratio, log, orig_tree, new_tree;
1743 x = exact_log2 (orig);
1744 y = exact_log2 (new_val);
1746 if (x != -1 && y != -1)
1748 if (x == y)
1749 return result;
1750 else if (x > y)
1752 log = build_int_cst (TREE_TYPE (result), x - y);
1753 result1 =
1754 fold_build2 (LSHIFT_EXPR, TREE_TYPE (result), result, log);
1755 return result1;
1757 log = build_int_cst (TREE_TYPE (result), y - x);
1758 result1 = fold_build2 (RSHIFT_EXPR, TREE_TYPE (result), result, log);
1760 return result1;
1762 orig_tree = build_int_cst (TREE_TYPE (result), orig);
1763 new_tree = build_int_cst (TREE_TYPE (result), new_val);
1764 ratio = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (result), result, orig_tree);
1765 result1 = fold_build2 (MULT_EXPR, TREE_TYPE (result), ratio, new_tree);
1767 return result1;
1771 /* We know that we are allowed to perform matrix flattening (according to the
1772 escape analysis), so we traverse the use-def chains of the SSA vars
1773 defined by the global variables pointing to the matrices of our interest.
1774 in each use of the SSA we calculate the offset from the base address
1775 according to the following equation:
1777 a[I1][I2]...[Ik] , where D1..Dk is the length of each dimension and the
1778 escaping level is m <= k, and a' is the new allocated matrix,
1779 will be translated to :
1781 b[I(m+1)]...[Ik]
1783 where
1784 b = a' + I1*D2...*Dm + I2*D3...Dm + ... + Im
1787 static int
1788 transform_access_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1790 gimple_stmt_iterator gsi;
1791 struct matrix_info *mi = (struct matrix_info *) *slot;
1792 int min_escape_l = mi->min_indirect_level_escape;
1793 struct access_site_info *acc_info;
1794 enum tree_code code;
1795 int i;
1797 if (min_escape_l < 2 || !mi->access_l)
1798 return 1;
1799 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
1800 i++)
1802 /* This is possible because we collect the access sites before
1803 we determine the final minimum indirection level. */
1804 if (acc_info->level >= min_escape_l)
1806 free (acc_info);
1807 continue;
1809 if (acc_info->is_alloc)
1811 if (acc_info->level >= 0 && gimple_bb (acc_info->stmt))
1813 ssa_op_iter iter;
1814 tree def;
1815 gimple stmt = acc_info->stmt;
1816 tree lhs;
1818 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
1819 mark_sym_for_renaming (SSA_NAME_VAR (def));
1820 gsi = gsi_for_stmt (stmt);
1821 gcc_assert (is_gimple_assign (acc_info->stmt));
1822 lhs = gimple_assign_lhs (acc_info->stmt);
1823 if (TREE_CODE (lhs) == SSA_NAME
1824 && acc_info->level < min_escape_l - 1)
1826 imm_use_iterator imm_iter;
1827 use_operand_p use_p;
1828 gimple use_stmt;
1830 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
1831 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1833 tree rhs, tmp;
1834 gimple new_stmt;
1836 gcc_assert (gimple_assign_rhs_code (acc_info->stmt)
1837 == MEM_REF);
1838 /* Emit convert statement to convert to type of use. */
1839 tmp = create_tmp_var (TREE_TYPE (lhs), "new");
1840 add_referenced_var (tmp);
1841 rhs = gimple_assign_rhs1 (acc_info->stmt);
1842 rhs = fold_convert (TREE_TYPE (tmp),
1843 TREE_OPERAND (rhs, 0));
1844 new_stmt = gimple_build_assign (tmp, rhs);
1845 tmp = make_ssa_name (tmp, new_stmt);
1846 gimple_assign_set_lhs (new_stmt, tmp);
1847 gsi = gsi_for_stmt (acc_info->stmt);
1848 gsi_insert_after (&gsi, new_stmt, GSI_SAME_STMT);
1849 SET_USE (use_p, tmp);
1852 if (acc_info->level < min_escape_l - 1)
1853 gsi_remove (&gsi, true);
1855 free (acc_info);
1856 continue;
1858 code = gimple_assign_rhs_code (acc_info->stmt);
1859 if (code == MEM_REF
1860 && acc_info->level < min_escape_l - 1)
1862 /* Replace the MEM_REF with NOP (cast) usually we are casting
1863 from "pointer to type" to "type". */
1864 tree t =
1865 build1 (NOP_EXPR, TREE_TYPE (gimple_assign_rhs1 (acc_info->stmt)),
1866 TREE_OPERAND (gimple_assign_rhs1 (acc_info->stmt), 0));
1867 gimple_assign_set_rhs_code (acc_info->stmt, NOP_EXPR);
1868 gimple_assign_set_rhs1 (acc_info->stmt, t);
1870 else if (code == POINTER_PLUS_EXPR
1871 && acc_info->level < (min_escape_l))
1873 imm_use_iterator imm_iter;
1874 use_operand_p use_p;
1876 tree offset;
1877 int k = acc_info->level;
1878 tree num_elements, total_elements;
1879 tree tmp1;
1880 tree d_size = mi->dimension_size[k];
1882 /* We already make sure in the analysis that the first operand
1883 is the base and the second is the offset. */
1884 offset = acc_info->offset;
1885 if (mi->dim_map[k] == min_escape_l - 1)
1887 if (!check_transpose_p || mi->is_transposed_p == false)
1888 tmp1 = offset;
1889 else
1891 tree new_offset;
1893 new_offset =
1894 compute_offset (mi->dimension_type_size[min_escape_l],
1895 mi->dimension_type_size[k + 1], offset);
1897 total_elements = new_offset;
1898 if (new_offset != offset)
1900 gsi = gsi_for_stmt (acc_info->stmt);
1901 tmp1 = force_gimple_operand_gsi (&gsi, total_elements,
1902 true, NULL,
1903 true, GSI_SAME_STMT);
1905 else
1906 tmp1 = offset;
1909 else
1911 d_size = mi->dimension_size[mi->dim_map[k] + 1];
1912 num_elements =
1913 fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, acc_info->index),
1914 fold_convert (sizetype, d_size));
1915 gsi = gsi_for_stmt (acc_info->stmt);
1916 tmp1 = force_gimple_operand_gsi (&gsi, num_elements, true,
1917 NULL, true, GSI_SAME_STMT);
1919 /* Replace the offset if needed. */
1920 if (tmp1 != offset)
1922 if (TREE_CODE (offset) == SSA_NAME)
1924 gimple use_stmt;
1926 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, offset)
1927 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1928 if (use_stmt == acc_info->stmt)
1929 SET_USE (use_p, tmp1);
1931 else
1933 gcc_assert (TREE_CODE (offset) == INTEGER_CST);
1934 gimple_assign_set_rhs2 (acc_info->stmt, tmp1);
1935 update_stmt (acc_info->stmt);
1939 /* ??? meanwhile this happens because we record the same access
1940 site more than once; we should be using a hash table to
1941 avoid this and insert the STMT of the access site only
1942 once.
1943 else
1944 gcc_unreachable (); */
1945 free (acc_info);
1947 VEC_free (access_site_info_p, heap, mi->access_l);
1949 update_ssa (TODO_update_ssa);
1950 #ifdef ENABLE_CHECKING
1951 verify_ssa (true);
1952 #endif
1953 return 1;
1956 /* Sort A array of counts. Arrange DIM_MAP to reflect the new order. */
1958 static void
1959 sort_dim_hot_level (gcov_type * a, int *dim_map, int n)
1961 int i, j, tmp1;
1962 gcov_type tmp;
1964 for (i = 0; i < n - 1; i++)
1966 for (j = 0; j < n - 1 - i; j++)
1968 if (a[j + 1] < a[j])
1970 tmp = a[j]; /* swap a[j] and a[j+1] */
1971 a[j] = a[j + 1];
1972 a[j + 1] = tmp;
1973 tmp1 = dim_map[j];
1974 dim_map[j] = dim_map[j + 1];
1975 dim_map[j + 1] = tmp1;
1981 /* Replace multiple mallocs (one for each dimension) to one malloc
1982 with the size of DIM1*DIM2*...*DIMN*size_of_element
1983 Make sure that we hold the size in the malloc site inside a
1984 new global variable; this way we ensure that the size doesn't
1985 change and it is accessible from all the other functions that
1986 uses the matrix. Also, the original calls to free are deleted,
1987 and replaced by a new call to free the flattened matrix. */
1989 static int
1990 transform_allocation_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1992 int i;
1993 struct matrix_info *mi;
1994 tree type, oldfn, prev_dim_size;
1995 gimple call_stmt_0, use_stmt;
1996 struct cgraph_node *c_node;
1997 struct cgraph_edge *e;
1998 gimple_stmt_iterator gsi;
1999 struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE};
2000 HOST_WIDE_INT element_size;
2002 imm_use_iterator imm_iter;
2003 use_operand_p use_p;
2004 tree old_size_0, tmp;
2005 int min_escape_l;
2006 int id;
2008 mi = (struct matrix_info *) *slot;
2010 min_escape_l = mi->min_indirect_level_escape;
2012 if (!mi->malloc_for_level)
2013 mi->min_indirect_level_escape = 0;
2015 if (mi->min_indirect_level_escape < 2)
2016 return 1;
2018 mi->dim_map = (int *) xcalloc (mi->min_indirect_level_escape, sizeof (int));
2019 for (i = 0; i < mi->min_indirect_level_escape; i++)
2020 mi->dim_map[i] = i;
2021 if (check_transpose_p)
2023 int i;
2025 if (dump_file)
2027 fprintf (dump_file, "Matrix %s:\n", get_name (mi->decl));
2028 for (i = 0; i < min_escape_l; i++)
2030 fprintf (dump_file, "dim %d before sort ", i);
2031 if (mi->dim_hot_level)
2032 fprintf (dump_file,
2033 "count is " HOST_WIDEST_INT_PRINT_DEC " \n",
2034 mi->dim_hot_level[i]);
2037 sort_dim_hot_level (mi->dim_hot_level, mi->dim_map,
2038 mi->min_indirect_level_escape);
2039 if (dump_file)
2040 for (i = 0; i < min_escape_l; i++)
2042 fprintf (dump_file, "dim %d after sort\n", i);
2043 if (mi->dim_hot_level)
2044 fprintf (dump_file, "count is " HOST_WIDE_INT_PRINT_DEC
2045 " \n", (HOST_WIDE_INT) mi->dim_hot_level[i]);
2047 for (i = 0; i < mi->min_indirect_level_escape; i++)
2049 if (dump_file)
2050 fprintf (dump_file, "dim_map[%d] after sort %d\n", i,
2051 mi->dim_map[i]);
2052 if (mi->dim_map[i] != i)
2054 if (dump_file)
2055 fprintf (dump_file,
2056 "Transposed dimensions: dim %d is now dim %d\n",
2057 mi->dim_map[i], i);
2058 mi->is_transposed_p = true;
2062 else
2064 for (i = 0; i < mi->min_indirect_level_escape; i++)
2065 mi->dim_map[i] = i;
2067 /* Call statement of allocation site of level 0. */
2068 call_stmt_0 = mi->malloc_for_level[0];
2070 /* Finds the correct malloc information. */
2071 collect_data_for_malloc_call (call_stmt_0, &mcd);
2073 mi->dimension_size[0] = mcd.size_var;
2074 mi->dimension_size_orig[0] = mcd.size_var;
2075 /* Make sure that the variables in the size expression for
2076 all the dimensions (above level 0) aren't modified in
2077 the allocation function. */
2078 for (i = 1; i < mi->min_indirect_level_escape; i++)
2080 tree t;
2081 check_var_data data;
2083 /* mi->dimension_size must contain the expression of the size calculated
2084 in check_allocation_function. */
2085 gcc_assert (mi->dimension_size[i]);
2087 data.fn = mi->allocation_function_decl;
2088 data.stmt = NULL;
2089 t = walk_tree_without_duplicates (&(mi->dimension_size[i]),
2090 check_var_notmodified_p,
2091 &data);
2092 if (t != NULL_TREE)
2094 mark_min_matrix_escape_level (mi, i, data.stmt);
2095 break;
2099 if (mi->min_indirect_level_escape < 2)
2100 return 1;
2102 /* Since we should make sure that the size expression is available
2103 before the call to malloc of level 0. */
2104 gsi = gsi_for_stmt (call_stmt_0);
2106 /* Find out the size of each dimension by looking at the malloc
2107 sites and create a global variable to hold it.
2108 We add the assignment to the global before the malloc of level 0. */
2110 /* To be able to produce gimple temporaries. */
2111 oldfn = current_function_decl;
2112 current_function_decl = mi->allocation_function_decl;
2113 push_cfun (DECL_STRUCT_FUNCTION (mi->allocation_function_decl));
2115 /* Set the dimension sizes as follows:
2116 DIM_SIZE[i] = DIM_SIZE[n] * ... * DIM_SIZE[i]
2117 where n is the maximum non escaping level. */
2118 element_size = mi->dimension_type_size[mi->min_indirect_level_escape];
2119 prev_dim_size = NULL_TREE;
2121 for (i = mi->min_indirect_level_escape - 1; i >= 0; i--)
2123 tree dim_size, dim_var;
2124 gimple stmt;
2125 tree d_type_size;
2127 /* Now put the size expression in a global variable and initialize it to
2128 the size expression before the malloc of level 0. */
2129 dim_var =
2130 add_new_static_var (TREE_TYPE
2131 (mi->dimension_size_orig[mi->dim_map[i]]));
2132 type = TREE_TYPE (mi->dimension_size_orig[mi->dim_map[i]]);
2134 /* DIM_SIZE = MALLOC_SIZE_PARAM / TYPE_SIZE. */
2135 /* Find which dim ID becomes dim I. */
2136 for (id = 0; id < mi->min_indirect_level_escape; id++)
2137 if (mi->dim_map[id] == i)
2138 break;
2139 d_type_size =
2140 build_int_cst (type, mi->dimension_type_size[id + 1]);
2141 if (!prev_dim_size)
2142 prev_dim_size = build_int_cst (type, element_size);
2143 if (!check_transpose_p && i == mi->min_indirect_level_escape - 1)
2145 dim_size = mi->dimension_size_orig[id];
2147 else
2149 dim_size =
2150 fold_build2 (TRUNC_DIV_EXPR, type, mi->dimension_size_orig[id],
2151 d_type_size);
2153 dim_size = fold_build2 (MULT_EXPR, type, dim_size, prev_dim_size);
2155 dim_size = force_gimple_operand_gsi (&gsi, dim_size, true, NULL,
2156 true, GSI_SAME_STMT);
2157 /* GLOBAL_HOLDING_THE_SIZE = DIM_SIZE. */
2158 stmt = gimple_build_assign (dim_var, dim_size);
2159 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2161 prev_dim_size = mi->dimension_size[i] = dim_var;
2163 update_ssa (TODO_update_ssa);
2164 /* Replace the malloc size argument in the malloc of level 0 to be
2165 the size of all the dimensions. */
2166 c_node = cgraph_get_node (mi->allocation_function_decl);
2167 gcc_checking_assert (c_node);
2168 old_size_0 = gimple_call_arg (call_stmt_0, 0);
2169 tmp = force_gimple_operand_gsi (&gsi, mi->dimension_size[0], true,
2170 NULL, true, GSI_SAME_STMT);
2171 if (TREE_CODE (old_size_0) == SSA_NAME)
2173 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, old_size_0)
2174 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2175 if (use_stmt == call_stmt_0)
2176 SET_USE (use_p, tmp);
2178 /* When deleting the calls to malloc we need also to remove the edge from
2179 the call graph to keep it consistent. Notice that cgraph_edge may
2180 create a new node in the call graph if there is no node for the given
2181 declaration; this shouldn't be the case but currently there is no way to
2182 check this outside of "cgraph.c". */
2183 for (i = 1; i < mi->min_indirect_level_escape; i++)
2185 gimple_stmt_iterator gsi;
2187 gimple call_stmt = mi->malloc_for_level[i];
2188 gcc_assert (is_gimple_call (call_stmt));
2189 e = cgraph_edge (c_node, call_stmt);
2190 gcc_assert (e);
2191 cgraph_remove_edge (e);
2192 gsi = gsi_for_stmt (call_stmt);
2193 /* Remove the call stmt. */
2194 gsi_remove (&gsi, true);
2195 /* Remove the assignment of the allocated area. */
2196 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2197 gimple_call_lhs (call_stmt))
2199 gsi = gsi_for_stmt (use_stmt);
2200 gsi_remove (&gsi, true);
2203 update_ssa (TODO_update_ssa);
2204 #ifdef ENABLE_CHECKING
2205 verify_ssa (true);
2206 #endif
2207 /* Delete the calls to free. */
2208 for (i = 1; i < mi->min_indirect_level_escape; i++)
2210 gimple_stmt_iterator gsi;
2212 /* ??? wonder why this case is possible but we failed on it once. */
2213 if (!mi->free_stmts[i].stmt)
2214 continue;
2216 c_node = cgraph_get_node (mi->free_stmts[i].func);
2217 gcc_checking_assert (c_node);
2218 gcc_assert (is_gimple_call (mi->free_stmts[i].stmt));
2219 e = cgraph_edge (c_node, mi->free_stmts[i].stmt);
2220 gcc_assert (e);
2221 cgraph_remove_edge (e);
2222 current_function_decl = mi->free_stmts[i].func;
2223 set_cfun (DECL_STRUCT_FUNCTION (mi->free_stmts[i].func));
2224 gsi = gsi_for_stmt (mi->free_stmts[i].stmt);
2225 gsi_remove (&gsi, true);
2227 /* Return to the previous situation. */
2228 current_function_decl = oldfn;
2229 pop_cfun ();
2230 return 1;
2235 /* Print out the results of the escape analysis. */
2236 static int
2237 dump_matrix_reorg_analysis (void **slot, void *data ATTRIBUTE_UNUSED)
2239 struct matrix_info *mi = (struct matrix_info *) *slot;
2241 if (!dump_file)
2242 return 1;
2243 fprintf (dump_file, "Matrix \"%s\"; Escaping Level: %d, Num Dims: %d,",
2244 get_name (mi->decl), mi->min_indirect_level_escape, mi->num_dims);
2245 fprintf (dump_file, " Malloc Dims: %d, ", mi->max_malloced_level);
2246 fprintf (dump_file, "\n");
2247 if (mi->min_indirect_level_escape >= 2)
2248 fprintf (dump_file, "Flattened %d dimensions \n",
2249 mi->min_indirect_level_escape);
2250 return 1;
2253 /* Perform matrix flattening. */
2255 static unsigned int
2256 matrix_reorg (void)
2258 struct cgraph_node *node;
2260 if (profile_info)
2261 check_transpose_p = true;
2262 else
2263 check_transpose_p = false;
2264 /* If there are hand written vectors, we skip this optimization. */
2265 FOR_EACH_FUNCTION (node)
2266 if (!may_flatten_matrices (node))
2267 return 0;
2268 matrices_to_reorg = htab_create (37, mtt_info_hash, mtt_info_eq, mat_free);
2269 /* Find and record all potential matrices in the program. */
2270 find_matrices_decl ();
2271 /* Analyze the accesses of the matrices (escaping analysis). */
2272 FOR_EACH_DEFINED_FUNCTION (node)
2274 tree temp_fn;
2276 temp_fn = current_function_decl;
2277 current_function_decl = node->symbol.decl;
2278 push_cfun (DECL_STRUCT_FUNCTION (node->symbol.decl));
2279 bitmap_obstack_initialize (NULL);
2280 gimple_register_cfg_hooks ();
2282 if (!gimple_in_ssa_p (cfun))
2284 free_dominance_info (CDI_DOMINATORS);
2285 free_dominance_info (CDI_POST_DOMINATORS);
2286 pop_cfun ();
2287 current_function_decl = temp_fn;
2288 bitmap_obstack_release (NULL);
2290 return 0;
2293 #ifdef ENABLE_CHECKING
2294 verify_flow_info ();
2295 #endif
2297 if (!matrices_to_reorg)
2299 free_dominance_info (CDI_DOMINATORS);
2300 free_dominance_info (CDI_POST_DOMINATORS);
2301 pop_cfun ();
2302 current_function_decl = temp_fn;
2303 bitmap_obstack_release (NULL);
2305 return 0;
2308 /* Create htap for phi nodes. */
2309 htab_mat_acc_phi_nodes = htab_create (37, mat_acc_phi_hash,
2310 mat_acc_phi_eq, free);
2311 if (!check_transpose_p)
2312 find_sites_in_func (false);
2313 else
2315 find_sites_in_func (true);
2316 loop_optimizer_init (LOOPS_NORMAL);
2317 if (current_loops)
2318 scev_initialize ();
2319 htab_traverse (matrices_to_reorg, analyze_transpose, NULL);
2320 if (current_loops)
2322 scev_finalize ();
2323 loop_optimizer_finalize ();
2324 current_loops = NULL;
2327 /* If the current function is the allocation function for any of
2328 the matrices we check its allocation and the escaping level. */
2329 htab_traverse (matrices_to_reorg, check_allocation_function, NULL);
2330 free_dominance_info (CDI_DOMINATORS);
2331 free_dominance_info (CDI_POST_DOMINATORS);
2332 pop_cfun ();
2333 current_function_decl = temp_fn;
2334 bitmap_obstack_release (NULL);
2336 htab_traverse (matrices_to_reorg, transform_allocation_sites, NULL);
2337 /* Now transform the accesses. */
2338 FOR_EACH_DEFINED_FUNCTION (node)
2340 /* Remember that allocation sites have been handled. */
2341 tree temp_fn;
2343 temp_fn = current_function_decl;
2344 current_function_decl = node->symbol.decl;
2345 push_cfun (DECL_STRUCT_FUNCTION (node->symbol.decl));
2346 bitmap_obstack_initialize (NULL);
2347 gimple_register_cfg_hooks ();
2348 record_all_accesses_in_func ();
2349 htab_traverse (matrices_to_reorg, transform_access_sites, NULL);
2350 cgraph_rebuild_references ();
2351 free_dominance_info (CDI_DOMINATORS);
2352 free_dominance_info (CDI_POST_DOMINATORS);
2353 pop_cfun ();
2354 current_function_decl = temp_fn;
2355 bitmap_obstack_release (NULL);
2357 htab_traverse (matrices_to_reorg, dump_matrix_reorg_analysis, NULL);
2359 current_function_decl = NULL;
2360 set_cfun (NULL);
2361 matrices_to_reorg = NULL;
2362 return 0;
2366 /* The condition for matrix flattening to be performed. */
2367 static bool
2368 gate_matrix_reorg (void)
2370 return flag_ipa_matrix_reorg && flag_whole_program;
2373 struct simple_ipa_opt_pass pass_ipa_matrix_reorg =
2376 SIMPLE_IPA_PASS,
2377 "matrix-reorg", /* name */
2378 gate_matrix_reorg, /* gate */
2379 matrix_reorg, /* execute */
2380 NULL, /* sub */
2381 NULL, /* next */
2382 0, /* static_pass_number */
2383 TV_NONE, /* tv_id */
2384 0, /* properties_required */
2385 0, /* properties_provided */
2386 0, /* properties_destroyed */
2387 0, /* todo_flags_start */
2388 TODO_dump_symtab /* todo_flags_finish */