* xcoffout.h (xcoffout_source_line): Update prototype.
[official-gcc.git] / gcc / matrix-reorg.c
blob7b8de0b890914effbb7adc76b1d42f065d9ee6b2
1 /* Matrix layout transformations.
2 Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
3 Contributed by Razya Ladelsky <razya@il.ibm.com>
4 Originally written by Revital Eres and Mustafa Hagog.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
23 Matrix flattening optimization tries to replace a N-dimensional
24 matrix with its equivalent M-dimensional matrix, where M < N.
25 This first implementation focuses on global matrices defined dynamically.
27 When N==1, we actually flatten the whole matrix.
28 For instance consider a two-dimensional array a [dim1] [dim2].
29 The code for allocating space for it usually looks like:
31 a = (int **) malloc(dim1 * sizeof(int *));
32 for (i=0; i<dim1; i++)
33 a[i] = (int *) malloc (dim2 * sizeof(int));
35 If the array "a" is found suitable for this optimization,
36 its allocation is replaced by:
38 a = (int *) malloc (dim1 * dim2 *sizeof(int));
40 and all the references to a[i][j] are replaced by a[i * dim2 + j].
42 The two main phases of the optimization are the analysis
43 and transformation.
44 The driver of the optimization is matrix_reorg ().
48 Analysis phase:
49 ===============
51 We'll number the dimensions outside-in, meaning the most external
52 is 0, then 1, and so on.
53 The analysis part of the optimization determines K, the escape
54 level of a N-dimensional matrix (K <= N), that allows flattening of
55 the external dimensions 0,1,..., K-1. Escape level 0 means that the
56 whole matrix escapes and no flattening is possible.
58 The analysis part is implemented in analyze_matrix_allocation_site()
59 and analyze_matrix_accesses().
61 Transformation phase:
62 =====================
63 In this phase we define the new flattened matrices that replace the
64 original matrices in the code.
65 Implemented in transform_allocation_sites(),
66 transform_access_sites().
68 Matrix Transposing
69 ==================
70 The idea of Matrix Transposing is organizing the matrix in a different
71 layout such that the dimensions are reordered.
72 This could produce better cache behavior in some cases.
74 For example, lets look at the matrix accesses in the following loop:
76 for (i=0; i<N; i++)
77 for (j=0; j<M; j++)
78 access to a[i][j]
80 This loop can produce good cache behavior because the elements of
81 the inner dimension are accessed sequentially.
83 However, if the accesses of the matrix were of the following form:
85 for (i=0; i<N; i++)
86 for (j=0; j<M; j++)
87 access to a[j][i]
89 In this loop we iterate the columns and not the rows.
90 Therefore, replacing the rows and columns
91 would have had an organization with better (cache) locality.
92 Replacing the dimensions of the matrix is called matrix transposing.
94 This example, of course, could be enhanced to multiple dimensions matrices
95 as well.
97 Since a program could include all kind of accesses, there is a decision
98 mechanism, implemented in analyze_transpose(), which implements a
99 heuristic that tries to determine whether to transpose the matrix or not,
100 according to the form of the more dominant accesses.
101 This decision is transferred to the flattening mechanism, and whether
102 the matrix was transposed or not, the matrix is flattened (if possible).
104 This decision making is based on profiling information and loop information.
105 If profiling information is available, decision making mechanism will be
106 operated, otherwise the matrix will only be flattened (if possible).
108 Both optimizations are described in the paper "Matrix flattening and
109 transposing in GCC" which was presented in GCC summit 2006.
110 http://www.gccsummit.org/2006/2006-GCC-Summit-Proceedings.pdf. */
112 #include "config.h"
113 #include "system.h"
114 #include "coretypes.h"
115 #include "tm.h"
116 #include "tree.h"
117 #include "rtl.h"
118 #include "tree-inline.h"
119 #include "tree-flow.h"
120 #include "tree-flow-inline.h"
121 #include "langhooks.h"
122 #include "hashtab.h"
123 #include "toplev.h"
124 #include "flags.h"
125 #include "ggc.h"
126 #include "debug.h"
127 #include "target.h"
128 #include "cgraph.h"
129 #include "diagnostic.h"
130 #include "timevar.h"
131 #include "params.h"
132 #include "fibheap.h"
133 #include "intl.h"
134 #include "function.h"
135 #include "basic-block.h"
136 #include "cfgloop.h"
137 #include "tree-iterator.h"
138 #include "tree-pass.h"
139 #include "opts.h"
140 #include "tree-data-ref.h"
141 #include "tree-chrec.h"
142 #include "tree-scalar-evolution.h"
144 /* We need to collect a lot of data from the original malloc,
145 particularly as the gimplifier has converted:
147 orig_var = (struct_type *) malloc (x * sizeof (struct_type *));
149 into
151 T3 = <constant> ; ** <constant> is amount to malloc; precomputed **
152 T4 = malloc (T3);
153 T5 = (struct_type *) T4;
154 orig_var = T5;
156 The following struct fields allow us to collect all the necessary data from
157 the gimplified program. The comments in the struct below are all based
158 on the gimple example above. */
160 struct malloc_call_data
162 gimple call_stmt; /* Tree for "T4 = malloc (T3);" */
163 tree size_var; /* Var decl for T3. */
164 tree malloc_size; /* Tree for "<constant>", the rhs assigned to T3. */
167 static tree can_calculate_expr_before_stmt (tree, sbitmap);
168 static tree can_calculate_stmt_before_stmt (gimple, sbitmap);
170 /* The front end of the compiler, when parsing statements of the form:
172 var = (type_cast) malloc (sizeof (type));
174 always converts this single statement into the following statements
175 (GIMPLE form):
177 T.1 = sizeof (type);
178 T.2 = malloc (T.1);
179 T.3 = (type_cast) T.2;
180 var = T.3;
182 Since we need to create new malloc statements and modify the original
183 statements somewhat, we need to find all four of the above statements.
184 Currently record_call_1 (called for building cgraph edges) finds and
185 records the statements containing the actual call to malloc, but we
186 need to find the rest of the variables/statements on our own. That
187 is what the following function does. */
188 static void
189 collect_data_for_malloc_call (gimple stmt, struct malloc_call_data *m_data)
191 tree size_var = NULL;
192 tree malloc_fn_decl;
193 tree arg1;
195 gcc_assert (is_gimple_call (stmt));
197 malloc_fn_decl = gimple_call_fndecl (stmt);
198 if (malloc_fn_decl == NULL
199 || DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
200 return;
202 arg1 = gimple_call_arg (stmt, 0);
203 size_var = arg1;
205 m_data->call_stmt = stmt;
206 m_data->size_var = size_var;
207 if (TREE_CODE (size_var) != VAR_DECL)
208 m_data->malloc_size = size_var;
209 else
210 m_data->malloc_size = NULL_TREE;
213 /* Information about matrix access site.
214 For example: if an access site of matrix arr is arr[i][j]
215 the ACCESS_SITE_INFO structure will have the address
216 of arr as its stmt. The INDEX_INFO will hold information about the
217 initial address and index of each dimension. */
218 struct access_site_info
220 /* The statement (INDIRECT_REF or POINTER_PLUS_EXPR). */
221 gimple stmt;
223 /* In case of POINTER_PLUS_EXPR, what is the offset. */
224 tree offset;
226 /* The index which created the offset. */
227 tree index;
229 /* The indirection level of this statement. */
230 int level;
232 /* TRUE for allocation site FALSE for access site. */
233 bool is_alloc;
235 /* The function containing the access site. */
236 tree function_decl;
238 /* This access is iterated in the inner most loop */
239 bool iterated_by_inner_most_loop_p;
242 typedef struct access_site_info *access_site_info_p;
243 DEF_VEC_P (access_site_info_p);
244 DEF_VEC_ALLOC_P (access_site_info_p, heap);
246 /* Calls to free when flattening a matrix. */
248 struct free_info
250 gimple stmt;
251 tree func;
254 /* Information about matrix to flatten. */
255 struct matrix_info
257 /* Decl tree of this matrix. */
258 tree decl;
259 /* Number of dimensions; number
260 of "*" in the type declaration. */
261 int num_dims;
263 /* Minimum indirection level that escapes, 0 means that
264 the whole matrix escapes, k means that dimensions
265 0 to ACTUAL_DIM - k escapes. */
266 int min_indirect_level_escape;
268 gimple min_indirect_level_escape_stmt;
270 /* Hold the allocation site for each level (dimension).
271 We can use NUM_DIMS as the upper bound and allocate the array
272 once with this number of elements and no need to use realloc and
273 MAX_MALLOCED_LEVEL. */
274 gimple *malloc_for_level;
276 int max_malloced_level;
278 /* Is the matrix transposed. */
279 bool is_transposed_p;
281 /* The location of the allocation sites (they must be in one
282 function). */
283 tree allocation_function_decl;
285 /* The calls to free for each level of indirection. */
286 struct free_info *free_stmts;
288 /* An array which holds for each dimension its size. where
289 dimension 0 is the outer most (one that contains all the others).
291 tree *dimension_size;
293 /* An array which holds for each dimension it's original size
294 (before transposing and flattening take place). */
295 tree *dimension_size_orig;
297 /* An array which holds for each dimension the size of the type of
298 of elements accessed in that level (in bytes). */
299 HOST_WIDE_INT *dimension_type_size;
301 int dimension_type_size_len;
303 /* An array collecting the count of accesses for each dimension. */
304 gcov_type *dim_hot_level;
306 /* An array of the accesses to be flattened.
307 elements are of type "struct access_site_info *". */
308 VEC (access_site_info_p, heap) * access_l;
310 /* A map of how the dimensions will be organized at the end of
311 the analyses. */
312 int *dim_map;
315 /* In each phi node we want to record the indirection level we have when we
316 get to the phi node. Usually we will have phi nodes with more than two
317 arguments, then we must assure that all of them get to the phi node with
318 the same indirection level, otherwise it's not safe to do the flattening.
319 So we record the information regarding the indirection level each time we
320 get to the phi node in this hash table. */
322 struct matrix_access_phi_node
324 gimple phi;
325 int indirection_level;
328 /* We use this structure to find if the SSA variable is accessed inside the
329 tree and record the tree containing it. */
331 struct ssa_acc_in_tree
333 /* The variable whose accesses in the tree we are looking for. */
334 tree ssa_var;
335 /* The tree and code inside it the ssa_var is accessed, currently
336 it could be an INDIRECT_REF or CALL_EXPR. */
337 enum tree_code t_code;
338 tree t_tree;
339 /* The place in the containing tree. */
340 tree *tp;
341 tree second_op;
342 bool var_found;
345 static void analyze_matrix_accesses (struct matrix_info *, tree, int, bool,
346 sbitmap, bool);
347 static int transform_allocation_sites (void **, void *);
348 static int transform_access_sites (void **, void *);
349 static int analyze_transpose (void **, void *);
350 static int dump_matrix_reorg_analysis (void **, void *);
352 static bool check_transpose_p;
354 /* Hash function used for the phi nodes. */
356 static hashval_t
357 mat_acc_phi_hash (const void *p)
359 const struct matrix_access_phi_node *const ma_phi =
360 (const struct matrix_access_phi_node *) p;
362 return htab_hash_pointer (ma_phi->phi);
365 /* Equality means phi node pointers are the same. */
367 static int
368 mat_acc_phi_eq (const void *p1, const void *p2)
370 const struct matrix_access_phi_node *const phi1 =
371 (const struct matrix_access_phi_node *) p1;
372 const struct matrix_access_phi_node *const phi2 =
373 (const struct matrix_access_phi_node *) p2;
375 if (phi1->phi == phi2->phi)
376 return 1;
378 return 0;
381 /* Hold the PHI nodes we visit during the traversal for escaping
382 analysis. */
383 static htab_t htab_mat_acc_phi_nodes = NULL;
385 /* This hash-table holds the information about the matrices we are
386 going to handle. */
387 static htab_t matrices_to_reorg = NULL;
389 /* Return a hash for MTT, which is really a "matrix_info *". */
390 static hashval_t
391 mtt_info_hash (const void *mtt)
393 return htab_hash_pointer (((const struct matrix_info *) mtt)->decl);
396 /* Return true if MTT1 and MTT2 (which are really both of type
397 "matrix_info *") refer to the same decl. */
398 static int
399 mtt_info_eq (const void *mtt1, const void *mtt2)
401 const struct matrix_info *const i1 = (const struct matrix_info *) mtt1;
402 const struct matrix_info *const i2 = (const struct matrix_info *) mtt2;
404 if (i1->decl == i2->decl)
405 return true;
407 return false;
410 /* Return false if STMT may contain a vector expression.
411 In this situation, all matrices should not be flattened. */
412 static bool
413 may_flatten_matrices_1 (gimple stmt)
415 tree t;
417 switch (gimple_code (stmt))
419 case GIMPLE_ASSIGN:
420 if (!gimple_assign_cast_p (stmt))
421 return true;
423 t = gimple_assign_rhs1 (stmt);
424 while (CONVERT_EXPR_P (t))
426 if (TREE_TYPE (t) && POINTER_TYPE_P (TREE_TYPE (t)))
428 tree pointee;
430 pointee = TREE_TYPE (t);
431 while (POINTER_TYPE_P (pointee))
432 pointee = TREE_TYPE (pointee);
433 if (TREE_CODE (pointee) == VECTOR_TYPE)
435 if (dump_file)
436 fprintf (dump_file,
437 "Found vector type, don't flatten matrix\n");
438 return false;
441 t = TREE_OPERAND (t, 0);
443 break;
444 case GIMPLE_ASM:
445 /* Asm code could contain vector operations. */
446 return false;
447 break;
448 default:
449 break;
451 return true;
454 /* Return false if there are hand-written vectors in the program.
455 We disable the flattening in such a case. */
456 static bool
457 may_flatten_matrices (struct cgraph_node *node)
459 tree decl;
460 struct function *func;
461 basic_block bb;
462 gimple_stmt_iterator gsi;
464 decl = node->decl;
465 if (node->analyzed)
467 func = DECL_STRUCT_FUNCTION (decl);
468 FOR_EACH_BB_FN (bb, func)
469 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
470 if (!may_flatten_matrices_1 (gsi_stmt (gsi)))
471 return false;
473 return true;
476 /* Given a VAR_DECL, check its type to determine whether it is
477 a definition of a dynamic allocated matrix and therefore is
478 a suitable candidate for the matrix flattening optimization.
479 Return NULL if VAR_DECL is not such decl. Otherwise, allocate
480 a MATRIX_INFO structure, fill it with the relevant information
481 and return a pointer to it.
482 TODO: handle also statically defined arrays. */
483 static struct matrix_info *
484 analyze_matrix_decl (tree var_decl)
486 struct matrix_info *m_node, tmpmi, *mi;
487 tree var_type;
488 int dim_num = 0;
490 gcc_assert (matrices_to_reorg);
492 if (TREE_CODE (var_decl) == PARM_DECL)
493 var_type = DECL_ARG_TYPE (var_decl);
494 else if (TREE_CODE (var_decl) == VAR_DECL)
495 var_type = TREE_TYPE (var_decl);
496 else
497 return NULL;
499 if (!POINTER_TYPE_P (var_type))
500 return NULL;
502 while (POINTER_TYPE_P (var_type))
504 var_type = TREE_TYPE (var_type);
505 dim_num++;
508 if (dim_num <= 1)
509 return NULL;
511 if (!COMPLETE_TYPE_P (var_type)
512 || TREE_CODE (TYPE_SIZE_UNIT (var_type)) != INTEGER_CST)
513 return NULL;
515 /* Check to see if this pointer is already in there. */
516 tmpmi.decl = var_decl;
517 mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi);
519 if (mi)
520 return NULL;
522 /* Record the matrix. */
524 m_node = (struct matrix_info *) xcalloc (1, sizeof (struct matrix_info));
525 m_node->decl = var_decl;
526 m_node->num_dims = dim_num;
527 m_node->free_stmts
528 = (struct free_info *) xcalloc (dim_num, sizeof (struct free_info));
530 /* Init min_indirect_level_escape to -1 to indicate that no escape
531 analysis has been done yet. */
532 m_node->min_indirect_level_escape = -1;
533 m_node->is_transposed_p = false;
535 return m_node;
538 /* Free matrix E. */
539 static void
540 mat_free (void *e)
542 struct matrix_info *mat = (struct matrix_info *) e;
544 if (!mat)
545 return;
547 if (mat->free_stmts)
548 free (mat->free_stmts);
549 if (mat->dim_hot_level)
550 free (mat->dim_hot_level);
551 if (mat->malloc_for_level)
552 free (mat->malloc_for_level);
555 /* Find all potential matrices.
556 TODO: currently we handle only multidimensional
557 dynamically allocated arrays. */
558 static void
559 find_matrices_decl (void)
561 struct matrix_info *tmp;
562 PTR *slot;
563 struct varpool_node *vnode;
565 gcc_assert (matrices_to_reorg);
567 /* For every global variable in the program:
568 Check to see if it's of a candidate type and record it. */
569 for (vnode = varpool_nodes_queue; vnode; vnode = vnode->next_needed)
571 tree var_decl = vnode->decl;
573 if (!var_decl || TREE_CODE (var_decl) != VAR_DECL)
574 continue;
576 if (matrices_to_reorg)
577 if ((tmp = analyze_matrix_decl (var_decl)))
579 if (!TREE_ADDRESSABLE (var_decl))
581 slot = htab_find_slot (matrices_to_reorg, tmp, INSERT);
582 *slot = tmp;
586 return;
589 /* Mark that the matrix MI escapes at level L. */
590 static void
591 mark_min_matrix_escape_level (struct matrix_info *mi, int l, gimple s)
593 if (mi->min_indirect_level_escape == -1
594 || (mi->min_indirect_level_escape > l))
596 mi->min_indirect_level_escape = l;
597 mi->min_indirect_level_escape_stmt = s;
601 /* Find if the SSA variable is accessed inside the
602 tree and record the tree containing it.
603 The only relevant uses are the case of SSA_NAME, or SSA inside
604 INDIRECT_REF, PLUS_EXPR, POINTER_PLUS_EXPR, MULT_EXPR. */
605 static void
606 ssa_accessed_in_tree (tree t, struct ssa_acc_in_tree *a)
608 a->t_code = TREE_CODE (t);
609 switch (a->t_code)
611 case SSA_NAME:
612 if (t == a->ssa_var)
613 a->var_found = true;
614 break;
615 case INDIRECT_REF:
616 if (SSA_VAR_P (TREE_OPERAND (t, 0))
617 && TREE_OPERAND (t, 0) == a->ssa_var)
618 a->var_found = true;
619 break;
620 default:
621 break;
625 /* Find if the SSA variable is accessed on the right hand side of
626 gimple call STMT. */
628 static void
629 ssa_accessed_in_call_rhs (gimple stmt, struct ssa_acc_in_tree *a)
631 tree decl;
632 tree arg;
633 size_t i;
635 a->t_code = CALL_EXPR;
636 for (i = 0; i < gimple_call_num_args (stmt); i++)
638 arg = gimple_call_arg (stmt, i);
639 if (arg == a->ssa_var)
641 a->var_found = true;
642 decl = gimple_call_fndecl (stmt);
643 a->t_tree = decl;
644 break;
649 /* Find if the SSA variable is accessed on the right hand side of
650 gimple assign STMT. */
652 static void
653 ssa_accessed_in_assign_rhs (gimple stmt, struct ssa_acc_in_tree *a)
656 a->t_code = gimple_assign_rhs_code (stmt);
657 switch (a->t_code)
659 tree op1, op2;
661 case SSA_NAME:
662 case INDIRECT_REF:
663 CASE_CONVERT:
664 case VIEW_CONVERT_EXPR:
665 ssa_accessed_in_tree (gimple_assign_rhs1 (stmt), a);
666 break;
667 case POINTER_PLUS_EXPR:
668 case PLUS_EXPR:
669 case MULT_EXPR:
670 op1 = gimple_assign_rhs1 (stmt);
671 op2 = gimple_assign_rhs2 (stmt);
673 if (op1 == a->ssa_var)
675 a->var_found = true;
676 a->second_op = op2;
678 else if (op2 == a->ssa_var)
680 a->var_found = true;
681 a->second_op = op1;
683 break;
684 default:
685 break;
689 /* Record the access/allocation site information for matrix MI so we can
690 handle it later in transformation. */
691 static void
692 record_access_alloc_site_info (struct matrix_info *mi, gimple stmt, tree offset,
693 tree index, int level, bool is_alloc)
695 struct access_site_info *acc_info;
697 if (!mi->access_l)
698 mi->access_l = VEC_alloc (access_site_info_p, heap, 100);
700 acc_info
701 = (struct access_site_info *)
702 xcalloc (1, sizeof (struct access_site_info));
703 acc_info->stmt = stmt;
704 acc_info->offset = offset;
705 acc_info->index = index;
706 acc_info->function_decl = current_function_decl;
707 acc_info->level = level;
708 acc_info->is_alloc = is_alloc;
710 VEC_safe_push (access_site_info_p, heap, mi->access_l, acc_info);
714 /* Record the malloc as the allocation site of the given LEVEL. But
715 first we Make sure that all the size parameters passed to malloc in
716 all the allocation sites could be pre-calculated before the call to
717 the malloc of level 0 (the main malloc call). */
718 static void
719 add_allocation_site (struct matrix_info *mi, gimple stmt, int level)
721 struct malloc_call_data mcd;
723 /* Make sure that the allocation sites are in the same function. */
724 if (!mi->allocation_function_decl)
725 mi->allocation_function_decl = current_function_decl;
726 else if (mi->allocation_function_decl != current_function_decl)
728 int min_malloc_level;
730 gcc_assert (mi->malloc_for_level);
732 /* Find the minimum malloc level that already has been seen;
733 we known its allocation function must be
734 MI->allocation_function_decl since it's different than
735 CURRENT_FUNCTION_DECL then the escaping level should be
736 MIN (LEVEL, MIN_MALLOC_LEVEL) - 1 , and the allocation function
737 must be set accordingly. */
738 for (min_malloc_level = 0;
739 min_malloc_level < mi->max_malloced_level
740 && mi->malloc_for_level[min_malloc_level]; min_malloc_level++);
741 if (level < min_malloc_level)
743 mi->allocation_function_decl = current_function_decl;
744 mark_min_matrix_escape_level (mi, min_malloc_level, stmt);
746 else
748 mark_min_matrix_escape_level (mi, level, stmt);
749 /* cannot be that (level == min_malloc_level)
750 we would have returned earlier. */
751 return;
755 /* Find the correct malloc information. */
756 collect_data_for_malloc_call (stmt, &mcd);
758 /* We accept only calls to malloc function; we do not accept
759 calls like calloc and realloc. */
760 if (!mi->malloc_for_level)
762 mi->malloc_for_level = XCNEWVEC (gimple, level + 1);
763 mi->max_malloced_level = level + 1;
765 else if (mi->max_malloced_level <= level)
767 mi->malloc_for_level
768 = XRESIZEVEC (gimple, mi->malloc_for_level, level + 1);
770 /* Zero the newly allocated items. */
771 memset (&(mi->malloc_for_level[mi->max_malloced_level + 1]),
772 0, (level - mi->max_malloced_level) * sizeof (tree));
774 mi->max_malloced_level = level + 1;
776 mi->malloc_for_level[level] = stmt;
779 /* Given an assignment statement STMT that we know that its
780 left-hand-side is the matrix MI variable, we traverse the immediate
781 uses backwards until we get to a malloc site. We make sure that
782 there is one and only one malloc site that sets this variable. When
783 we are performing the flattening we generate a new variable that
784 will hold the size for each dimension; each malloc that allocates a
785 dimension has the size parameter; we use that parameter to
786 initialize the dimension size variable so we can use it later in
787 the address calculations. LEVEL is the dimension we're inspecting.
788 Return if STMT is related to an allocation site. */
790 static void
791 analyze_matrix_allocation_site (struct matrix_info *mi, gimple stmt,
792 int level, sbitmap visited)
794 if (gimple_assign_copy_p (stmt) || gimple_assign_cast_p (stmt))
796 tree rhs = gimple_assign_rhs1 (stmt);
798 if (TREE_CODE (rhs) == SSA_NAME)
800 gimple def = SSA_NAME_DEF_STMT (rhs);
802 analyze_matrix_allocation_site (mi, def, level, visited);
803 return;
805 /* If we are back to the original matrix variable then we
806 are sure that this is analyzed as an access site. */
807 else if (rhs == mi->decl)
808 return;
810 /* A result of call to malloc. */
811 else if (is_gimple_call (stmt))
813 int call_flags = gimple_call_flags (stmt);
815 if (!(call_flags & ECF_MALLOC))
817 mark_min_matrix_escape_level (mi, level, stmt);
818 return;
820 else
822 tree malloc_fn_decl;
823 const char *malloc_fname;
825 malloc_fn_decl = gimple_call_fndecl (stmt);
826 if (malloc_fn_decl == NULL_TREE)
828 mark_min_matrix_escape_level (mi, level, stmt);
829 return;
831 malloc_fname = IDENTIFIER_POINTER (DECL_NAME (malloc_fn_decl));
832 if (DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
834 if (dump_file)
835 fprintf (dump_file,
836 "Matrix %s is an argument to function %s\n",
837 get_name (mi->decl), get_name (malloc_fn_decl));
838 mark_min_matrix_escape_level (mi, level, stmt);
839 return;
842 /* This is a call to malloc of level 'level'.
843 mi->max_malloced_level-1 == level means that we've
844 seen a malloc statement of level 'level' before.
845 If the statement is not the same one that we've
846 seen before, then there's another malloc statement
847 for the same level, which means that we need to mark
848 it escaping. */
849 if (mi->malloc_for_level
850 && mi->max_malloced_level-1 == level
851 && mi->malloc_for_level[level] != stmt)
853 mark_min_matrix_escape_level (mi, level, stmt);
854 return;
856 else
857 add_allocation_site (mi, stmt, level);
858 return;
860 /* Looks like we don't know what is happening in this
861 statement so be in the safe side and mark it as escaping. */
862 mark_min_matrix_escape_level (mi, level, stmt);
865 /* The transposing decision making.
866 In order to to calculate the profitability of transposing, we collect two
867 types of information regarding the accesses:
868 1. profiling information used to express the hotness of an access, that
869 is how often the matrix is accessed by this access site (count of the
870 access site).
871 2. which dimension in the access site is iterated by the inner
872 most loop containing this access.
874 The matrix will have a calculated value of weighted hotness for each
875 dimension.
876 Intuitively the hotness level of a dimension is a function of how
877 many times it was the most frequently accessed dimension in the
878 highly executed access sites of this matrix.
880 As computed by following equation:
881 m n
882 __ __
883 \ \ dim_hot_level[i] +=
884 /_ /_
885 j i
886 acc[j]->dim[i]->iter_by_inner_loop * count(j)
888 Where n is the number of dims and m is the number of the matrix
889 access sites. acc[j]->dim[i]->iter_by_inner_loop is 1 if acc[j]
890 iterates over dim[i] in innermost loop, and is 0 otherwise.
892 The organization of the new matrix should be according to the
893 hotness of each dimension. The hotness of the dimension implies
894 the locality of the elements.*/
895 static int
896 analyze_transpose (void **slot, void *data ATTRIBUTE_UNUSED)
898 struct matrix_info *mi = (struct matrix_info *) *slot;
899 int min_escape_l = mi->min_indirect_level_escape;
900 struct loop *loop;
901 affine_iv iv;
902 struct access_site_info *acc_info;
903 int i;
905 if (min_escape_l < 2 || !mi->access_l)
907 if (mi->access_l)
909 for (i = 0;
910 VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
911 i++)
912 free (acc_info);
913 VEC_free (access_site_info_p, heap, mi->access_l);
916 return 1;
918 if (!mi->dim_hot_level)
919 mi->dim_hot_level =
920 (gcov_type *) xcalloc (min_escape_l, sizeof (gcov_type));
923 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
924 i++)
926 if (gimple_assign_rhs_code (acc_info->stmt) == POINTER_PLUS_EXPR
927 && acc_info->level < min_escape_l)
929 loop = loop_containing_stmt (acc_info->stmt);
930 if (!loop || loop->inner)
932 free (acc_info);
933 continue;
935 if (simple_iv (loop, loop, acc_info->offset, &iv, true))
937 if (iv.step != NULL)
939 HOST_WIDE_INT istep;
941 istep = int_cst_value (iv.step);
942 if (istep != 0)
944 acc_info->iterated_by_inner_most_loop_p = 1;
945 mi->dim_hot_level[acc_info->level] +=
946 gimple_bb (acc_info->stmt)->count;
952 free (acc_info);
954 VEC_free (access_site_info_p, heap, mi->access_l);
956 return 1;
959 /* Find the index which defines the OFFSET from base.
960 We walk from use to def until we find how the offset was defined. */
961 static tree
962 get_index_from_offset (tree offset, gimple def_stmt)
964 tree op1, op2, index;
966 if (gimple_code (def_stmt) == GIMPLE_PHI)
967 return NULL;
968 if ((gimple_assign_copy_p (def_stmt) || gimple_assign_cast_p (def_stmt))
969 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
970 return get_index_from_offset (offset,
971 SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt)));
972 else if (is_gimple_assign (def_stmt)
973 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
975 op1 = gimple_assign_rhs1 (def_stmt);
976 op2 = gimple_assign_rhs2 (def_stmt);
977 if (TREE_CODE (op1) != INTEGER_CST && TREE_CODE (op2) != INTEGER_CST)
978 return NULL;
979 index = (TREE_CODE (op1) == INTEGER_CST) ? op2 : op1;
980 return index;
982 else
983 return NULL_TREE;
986 /* update MI->dimension_type_size[CURRENT_INDIRECT_LEVEL] with the size
987 of the type related to the SSA_VAR, or the type related to the
988 lhs of STMT, in the case that it is an INDIRECT_REF. */
989 static void
990 update_type_size (struct matrix_info *mi, gimple stmt, tree ssa_var,
991 int current_indirect_level)
993 tree lhs;
994 HOST_WIDE_INT type_size;
996 /* Update type according to the type of the INDIRECT_REF expr. */
997 if (is_gimple_assign (stmt)
998 && TREE_CODE (gimple_assign_lhs (stmt)) == INDIRECT_REF)
1000 lhs = gimple_assign_lhs (stmt);
1001 gcc_assert (POINTER_TYPE_P
1002 (TREE_TYPE (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
1003 type_size =
1004 int_size_in_bytes (TREE_TYPE
1005 (TREE_TYPE
1006 (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
1008 else
1009 type_size = int_size_in_bytes (TREE_TYPE (ssa_var));
1011 /* Record the size of elements accessed (as a whole)
1012 in the current indirection level (dimension). If the size of
1013 elements is not known at compile time, mark it as escaping. */
1014 if (type_size <= 0)
1015 mark_min_matrix_escape_level (mi, current_indirect_level, stmt);
1016 else
1018 int l = current_indirect_level;
1020 if (!mi->dimension_type_size)
1022 mi->dimension_type_size
1023 = (HOST_WIDE_INT *) xcalloc (l + 1, sizeof (HOST_WIDE_INT));
1024 mi->dimension_type_size_len = l + 1;
1026 else if (mi->dimension_type_size_len < l + 1)
1028 mi->dimension_type_size
1029 = (HOST_WIDE_INT *) xrealloc (mi->dimension_type_size,
1030 (l + 1) * sizeof (HOST_WIDE_INT));
1031 memset (&mi->dimension_type_size[mi->dimension_type_size_len],
1032 0, (l + 1 - mi->dimension_type_size_len)
1033 * sizeof (HOST_WIDE_INT));
1034 mi->dimension_type_size_len = l + 1;
1036 /* Make sure all the accesses in the same level have the same size
1037 of the type. */
1038 if (!mi->dimension_type_size[l])
1039 mi->dimension_type_size[l] = type_size;
1040 else if (mi->dimension_type_size[l] != type_size)
1041 mark_min_matrix_escape_level (mi, l, stmt);
1045 /* USE_STMT represents a GIMPLE_CALL, where one of the arguments is the
1046 ssa var that we want to check because it came from some use of matrix
1047 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so
1048 far. */
1050 static int
1051 analyze_accesses_for_call_stmt (struct matrix_info *mi, tree ssa_var,
1052 gimple use_stmt, int current_indirect_level)
1054 tree fndecl = gimple_call_fndecl (use_stmt);
1056 if (gimple_call_lhs (use_stmt))
1058 tree lhs = gimple_call_lhs (use_stmt);
1059 struct ssa_acc_in_tree lhs_acc, rhs_acc;
1061 memset (&lhs_acc, 0, sizeof (lhs_acc));
1062 memset (&rhs_acc, 0, sizeof (rhs_acc));
1064 lhs_acc.ssa_var = ssa_var;
1065 lhs_acc.t_code = ERROR_MARK;
1066 ssa_accessed_in_tree (lhs, &lhs_acc);
1067 rhs_acc.ssa_var = ssa_var;
1068 rhs_acc.t_code = ERROR_MARK;
1069 ssa_accessed_in_call_rhs (use_stmt, &rhs_acc);
1071 /* The SSA must be either in the left side or in the right side,
1072 to understand what is happening.
1073 In case the SSA_NAME is found in both sides we should be escaping
1074 at this level because in this case we cannot calculate the
1075 address correctly. */
1076 if ((lhs_acc.var_found && rhs_acc.var_found
1077 && lhs_acc.t_code == INDIRECT_REF)
1078 || (!rhs_acc.var_found && !lhs_acc.var_found))
1080 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1081 return current_indirect_level;
1083 gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1085 /* If we are storing to the matrix at some level, then mark it as
1086 escaping at that level. */
1087 if (lhs_acc.var_found)
1089 int l = current_indirect_level + 1;
1091 gcc_assert (lhs_acc.t_code == INDIRECT_REF);
1092 mark_min_matrix_escape_level (mi, l, use_stmt);
1093 return current_indirect_level;
1097 if (fndecl)
1099 if (DECL_FUNCTION_CODE (fndecl) != BUILT_IN_FREE)
1101 if (dump_file)
1102 fprintf (dump_file,
1103 "Matrix %s: Function call %s, level %d escapes.\n",
1104 get_name (mi->decl), get_name (fndecl),
1105 current_indirect_level);
1106 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1108 else if (mi->free_stmts[current_indirect_level].stmt != NULL
1109 && mi->free_stmts[current_indirect_level].stmt != use_stmt)
1110 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1111 else
1113 /*Record the free statements so we can delete them
1114 later. */
1115 int l = current_indirect_level;
1117 mi->free_stmts[l].stmt = use_stmt;
1118 mi->free_stmts[l].func = current_function_decl;
1121 return current_indirect_level;
1124 /* USE_STMT represents a phi node of the ssa var that we want to
1125 check because it came from some use of matrix
1127 We check all the escaping levels that get to the PHI node
1128 and make sure they are all the same escaping;
1129 if not (which is rare) we let the escaping level be the
1130 minimum level that gets into that PHI because starting from
1131 that level we cannot expect the behavior of the indirections.
1132 CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1134 static void
1135 analyze_accesses_for_phi_node (struct matrix_info *mi, gimple use_stmt,
1136 int current_indirect_level, sbitmap visited,
1137 bool record_accesses)
1140 struct matrix_access_phi_node tmp_maphi, *maphi, **pmaphi;
1142 tmp_maphi.phi = use_stmt;
1143 if ((maphi = (struct matrix_access_phi_node *)
1144 htab_find (htab_mat_acc_phi_nodes, &tmp_maphi)))
1146 if (maphi->indirection_level == current_indirect_level)
1147 return;
1148 else
1150 int level = MIN (maphi->indirection_level,
1151 current_indirect_level);
1152 size_t j;
1153 gimple stmt = NULL;
1155 maphi->indirection_level = level;
1156 for (j = 0; j < gimple_phi_num_args (use_stmt); j++)
1158 tree def = PHI_ARG_DEF (use_stmt, j);
1160 if (gimple_code (SSA_NAME_DEF_STMT (def)) != GIMPLE_PHI)
1161 stmt = SSA_NAME_DEF_STMT (def);
1163 mark_min_matrix_escape_level (mi, level, stmt);
1165 return;
1167 maphi = (struct matrix_access_phi_node *)
1168 xcalloc (1, sizeof (struct matrix_access_phi_node));
1169 maphi->phi = use_stmt;
1170 maphi->indirection_level = current_indirect_level;
1172 /* Insert to hash table. */
1173 pmaphi = (struct matrix_access_phi_node **)
1174 htab_find_slot (htab_mat_acc_phi_nodes, maphi, INSERT);
1175 gcc_assert (pmaphi);
1176 *pmaphi = maphi;
1178 if (!TEST_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt))))
1180 SET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1181 analyze_matrix_accesses (mi, PHI_RESULT (use_stmt),
1182 current_indirect_level, false, visited,
1183 record_accesses);
1184 RESET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1188 /* USE_STMT represents an assign statement (the rhs or lhs include
1189 the ssa var that we want to check because it came from some use of matrix
1190 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1192 static int
1193 analyze_accesses_for_assign_stmt (struct matrix_info *mi, tree ssa_var,
1194 gimple use_stmt, int current_indirect_level,
1195 bool last_op, sbitmap visited,
1196 bool record_accesses)
1198 tree lhs = gimple_get_lhs (use_stmt);
1199 struct ssa_acc_in_tree lhs_acc, rhs_acc;
1201 memset (&lhs_acc, 0, sizeof (lhs_acc));
1202 memset (&rhs_acc, 0, sizeof (rhs_acc));
1204 lhs_acc.ssa_var = ssa_var;
1205 lhs_acc.t_code = ERROR_MARK;
1206 ssa_accessed_in_tree (lhs, &lhs_acc);
1207 rhs_acc.ssa_var = ssa_var;
1208 rhs_acc.t_code = ERROR_MARK;
1209 ssa_accessed_in_assign_rhs (use_stmt, &rhs_acc);
1211 /* The SSA must be either in the left side or in the right side,
1212 to understand what is happening.
1213 In case the SSA_NAME is found in both sides we should be escaping
1214 at this level because in this case we cannot calculate the
1215 address correctly. */
1216 if ((lhs_acc.var_found && rhs_acc.var_found
1217 && lhs_acc.t_code == INDIRECT_REF)
1218 || (!rhs_acc.var_found && !lhs_acc.var_found))
1220 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1221 return current_indirect_level;
1223 gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1225 /* If we are storing to the matrix at some level, then mark it as
1226 escaping at that level. */
1227 if (lhs_acc.var_found)
1229 int l = current_indirect_level + 1;
1231 gcc_assert (lhs_acc.t_code == INDIRECT_REF);
1233 if (!(gimple_assign_copy_p (use_stmt)
1234 || gimple_assign_cast_p (use_stmt))
1235 || (TREE_CODE (gimple_assign_rhs1 (use_stmt)) != SSA_NAME))
1236 mark_min_matrix_escape_level (mi, l, use_stmt);
1237 else
1239 gimple def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (use_stmt));
1240 analyze_matrix_allocation_site (mi, def_stmt, l, visited);
1241 if (record_accesses)
1242 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1243 NULL_TREE, l, true);
1244 update_type_size (mi, use_stmt, NULL, l);
1246 return current_indirect_level;
1248 /* Now, check the right-hand-side, to see how the SSA variable
1249 is used. */
1250 if (rhs_acc.var_found)
1252 if (rhs_acc.t_code != INDIRECT_REF
1253 && rhs_acc.t_code != POINTER_PLUS_EXPR && rhs_acc.t_code != SSA_NAME)
1255 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1256 return current_indirect_level;
1258 /* If the access in the RHS has an indirection increase the
1259 indirection level. */
1260 if (rhs_acc.t_code == INDIRECT_REF)
1262 if (record_accesses)
1263 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1264 NULL_TREE,
1265 current_indirect_level, true);
1266 current_indirect_level += 1;
1268 else if (rhs_acc.t_code == POINTER_PLUS_EXPR)
1270 gcc_assert (rhs_acc.second_op);
1271 if (last_op)
1272 /* Currently we support only one PLUS expression on the
1273 SSA_NAME that holds the base address of the current
1274 indirection level; to support more general case there
1275 is a need to hold a stack of expressions and regenerate
1276 the calculation later. */
1277 mark_min_matrix_escape_level (mi, current_indirect_level,
1278 use_stmt);
1279 else
1281 tree index;
1282 tree op1, op2;
1284 op1 = gimple_assign_rhs1 (use_stmt);
1285 op2 = gimple_assign_rhs2 (use_stmt);
1287 op2 = (op1 == ssa_var) ? op2 : op1;
1288 if (TREE_CODE (op2) == INTEGER_CST)
1289 index =
1290 build_int_cst (TREE_TYPE (op1),
1291 TREE_INT_CST_LOW (op2) /
1292 int_size_in_bytes (TREE_TYPE (op1)));
1293 else
1295 index =
1296 get_index_from_offset (op2, SSA_NAME_DEF_STMT (op2));
1297 if (index == NULL_TREE)
1299 mark_min_matrix_escape_level (mi,
1300 current_indirect_level,
1301 use_stmt);
1302 return current_indirect_level;
1305 if (record_accesses)
1306 record_access_alloc_site_info (mi, use_stmt, op2,
1307 index,
1308 current_indirect_level, false);
1311 /* If we are storing this level of indirection mark it as
1312 escaping. */
1313 if (lhs_acc.t_code == INDIRECT_REF || TREE_CODE (lhs) != SSA_NAME)
1315 int l = current_indirect_level;
1317 /* One exception is when we are storing to the matrix
1318 variable itself; this is the case of malloc, we must make
1319 sure that it's the one and only one call to malloc so
1320 we call analyze_matrix_allocation_site to check
1321 this out. */
1322 if (TREE_CODE (lhs) != VAR_DECL || lhs != mi->decl)
1323 mark_min_matrix_escape_level (mi, current_indirect_level,
1324 use_stmt);
1325 else
1327 /* Also update the escaping level. */
1328 analyze_matrix_allocation_site (mi, use_stmt, l, visited);
1329 if (record_accesses)
1330 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1331 NULL_TREE, l, true);
1334 else
1336 /* We are placing it in an SSA, follow that SSA. */
1337 analyze_matrix_accesses (mi, lhs,
1338 current_indirect_level,
1339 rhs_acc.t_code == POINTER_PLUS_EXPR,
1340 visited, record_accesses);
1343 return current_indirect_level;
1346 /* Given a SSA_VAR (coming from a use statement of the matrix MI),
1347 follow its uses and level of indirection and find out the minimum
1348 indirection level it escapes in (the highest dimension) and the maximum
1349 level it is accessed in (this will be the actual dimension of the
1350 matrix). The information is accumulated in MI.
1351 We look at the immediate uses, if one escapes we finish; if not,
1352 we make a recursive call for each one of the immediate uses of the
1353 resulting SSA name. */
1354 static void
1355 analyze_matrix_accesses (struct matrix_info *mi, tree ssa_var,
1356 int current_indirect_level, bool last_op,
1357 sbitmap visited, bool record_accesses)
1359 imm_use_iterator imm_iter;
1360 use_operand_p use_p;
1362 update_type_size (mi, SSA_NAME_DEF_STMT (ssa_var), ssa_var,
1363 current_indirect_level);
1365 /* We don't go beyond the escaping level when we are performing the
1366 flattening. NOTE: we keep the last indirection level that doesn't
1367 escape. */
1368 if (mi->min_indirect_level_escape > -1
1369 && mi->min_indirect_level_escape <= current_indirect_level)
1370 return;
1372 /* Now go over the uses of the SSA_NAME and check how it is used in
1373 each one of them. We are mainly looking for the pattern INDIRECT_REF,
1374 then a POINTER_PLUS_EXPR, then INDIRECT_REF etc. while in between there could
1375 be any number of copies and casts. */
1376 gcc_assert (TREE_CODE (ssa_var) == SSA_NAME);
1378 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_var)
1380 gimple use_stmt = USE_STMT (use_p);
1381 if (gimple_code (use_stmt) == GIMPLE_PHI)
1382 /* We check all the escaping levels that get to the PHI node
1383 and make sure they are all the same escaping;
1384 if not (which is rare) we let the escaping level be the
1385 minimum level that gets into that PHI because starting from
1386 that level we cannot expect the behavior of the indirections. */
1388 analyze_accesses_for_phi_node (mi, use_stmt, current_indirect_level,
1389 visited, record_accesses);
1391 else if (is_gimple_call (use_stmt))
1392 analyze_accesses_for_call_stmt (mi, ssa_var, use_stmt,
1393 current_indirect_level);
1394 else if (is_gimple_assign (use_stmt))
1395 current_indirect_level =
1396 analyze_accesses_for_assign_stmt (mi, ssa_var, use_stmt,
1397 current_indirect_level, last_op,
1398 visited, record_accesses);
1402 typedef struct
1404 tree fn;
1405 gimple stmt;
1406 } check_var_data;
1408 /* A walk_tree function to go over the VAR_DECL, PARM_DECL nodes of
1409 the malloc size expression and check that those aren't changed
1410 over the function. */
1411 static tree
1412 check_var_notmodified_p (tree * tp, int *walk_subtrees, void *data)
1414 basic_block bb;
1415 tree t = *tp;
1416 check_var_data *callback_data = (check_var_data*) data;
1417 tree fn = callback_data->fn;
1418 gimple_stmt_iterator gsi;
1419 gimple stmt;
1421 if (TREE_CODE (t) != VAR_DECL && TREE_CODE (t) != PARM_DECL)
1422 return NULL_TREE;
1424 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (fn))
1426 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1428 stmt = gsi_stmt (gsi);
1429 if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
1430 continue;
1431 if (gimple_get_lhs (stmt) == t)
1433 callback_data->stmt = stmt;
1434 return t;
1438 *walk_subtrees = 1;
1439 return NULL_TREE;
1442 /* Go backwards in the use-def chains and find out the expression
1443 represented by the possible SSA name in STMT, until it is composed
1444 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1445 we make sure that all the arguments represent the same subexpression,
1446 otherwise we fail. */
1448 static tree
1449 can_calculate_stmt_before_stmt (gimple stmt, sbitmap visited)
1451 tree op1, op2, res;
1452 enum tree_code code;
1454 switch (gimple_code (stmt))
1456 case GIMPLE_ASSIGN:
1457 code = gimple_assign_rhs_code (stmt);
1458 op1 = gimple_assign_rhs1 (stmt);
1460 switch (code)
1462 case POINTER_PLUS_EXPR:
1463 case PLUS_EXPR:
1464 case MINUS_EXPR:
1465 case MULT_EXPR:
1467 op2 = gimple_assign_rhs2 (stmt);
1468 op1 = can_calculate_expr_before_stmt (op1, visited);
1469 if (!op1)
1470 return NULL_TREE;
1471 op2 = can_calculate_expr_before_stmt (op2, visited);
1472 if (op2)
1473 return fold_build2 (code, gimple_expr_type (stmt), op1, op2);
1474 return NULL_TREE;
1476 CASE_CONVERT:
1477 res = can_calculate_expr_before_stmt (op1, visited);
1478 if (res != NULL_TREE)
1479 return build1 (code, gimple_expr_type (stmt), res);
1480 else
1481 return NULL_TREE;
1483 default:
1484 if (gimple_assign_single_p (stmt))
1485 return can_calculate_expr_before_stmt (op1, visited);
1486 else
1487 return NULL_TREE;
1490 case GIMPLE_PHI:
1492 size_t j;
1494 res = NULL_TREE;
1495 /* Make sure all the arguments represent the same value. */
1496 for (j = 0; j < gimple_phi_num_args (stmt); j++)
1498 tree new_res;
1499 tree def = PHI_ARG_DEF (stmt, j);
1501 new_res = can_calculate_expr_before_stmt (def, visited);
1502 if (res == NULL_TREE)
1503 res = new_res;
1504 else if (!new_res || !expressions_equal_p (res, new_res))
1505 return NULL_TREE;
1507 return res;
1510 default:
1511 return NULL_TREE;
1515 /* Go backwards in the use-def chains and find out the expression
1516 represented by the possible SSA name in EXPR, until it is composed
1517 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1518 we make sure that all the arguments represent the same subexpression,
1519 otherwise we fail. */
1520 static tree
1521 can_calculate_expr_before_stmt (tree expr, sbitmap visited)
1523 gimple def_stmt;
1524 tree res;
1526 switch (TREE_CODE (expr))
1528 case SSA_NAME:
1529 /* Case of loop, we don't know to represent this expression. */
1530 if (TEST_BIT (visited, SSA_NAME_VERSION (expr)))
1531 return NULL_TREE;
1533 SET_BIT (visited, SSA_NAME_VERSION (expr));
1534 def_stmt = SSA_NAME_DEF_STMT (expr);
1535 res = can_calculate_stmt_before_stmt (def_stmt, visited);
1536 RESET_BIT (visited, SSA_NAME_VERSION (expr));
1537 return res;
1538 case VAR_DECL:
1539 case PARM_DECL:
1540 case INTEGER_CST:
1541 return expr;
1543 default:
1544 return NULL_TREE;
1548 /* There should be only one allocation function for the dimensions
1549 that don't escape. Here we check the allocation sites in this
1550 function. We must make sure that all the dimensions are allocated
1551 using malloc and that the malloc size parameter expression could be
1552 pre-calculated before the call to the malloc of dimension 0.
1554 Given a candidate matrix for flattening -- MI -- check if it's
1555 appropriate for flattening -- we analyze the allocation
1556 sites that we recorded in the previous analysis. The result of the
1557 analysis is a level of indirection (matrix dimension) in which the
1558 flattening is safe. We check the following conditions:
1559 1. There is only one allocation site for each dimension.
1560 2. The allocation sites of all the dimensions are in the same
1561 function.
1562 (The above two are being taken care of during the analysis when
1563 we check the allocation site).
1564 3. All the dimensions that we flatten are allocated at once; thus
1565 the total size must be known before the allocation of the
1566 dimension 0 (top level) -- we must make sure we represent the
1567 size of the allocation as an expression of global parameters or
1568 constants and that those doesn't change over the function. */
1570 static int
1571 check_allocation_function (void **slot, void *data ATTRIBUTE_UNUSED)
1573 int level;
1574 gimple_stmt_iterator gsi;
1575 basic_block bb_level_0;
1576 struct matrix_info *mi = (struct matrix_info *) *slot;
1577 sbitmap visited;
1579 if (!mi->malloc_for_level)
1580 return 1;
1582 visited = sbitmap_alloc (num_ssa_names);
1584 /* Do nothing if the current function is not the allocation
1585 function of MI. */
1586 if (mi->allocation_function_decl != current_function_decl
1587 /* We aren't in the main allocation function yet. */
1588 || !mi->malloc_for_level[0])
1589 return 1;
1591 for (level = 1; level < mi->max_malloced_level; level++)
1592 if (!mi->malloc_for_level[level])
1593 break;
1595 mark_min_matrix_escape_level (mi, level, NULL);
1597 gsi = gsi_for_stmt (mi->malloc_for_level[0]);
1598 bb_level_0 = gsi.bb;
1600 /* Check if the expression of the size passed to malloc could be
1601 pre-calculated before the malloc of level 0. */
1602 for (level = 1; level < mi->min_indirect_level_escape; level++)
1604 gimple call_stmt;
1605 tree size;
1606 struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE};
1608 call_stmt = mi->malloc_for_level[level];
1610 /* Find the correct malloc information. */
1611 collect_data_for_malloc_call (call_stmt, &mcd);
1613 /* No need to check anticipation for constants. */
1614 if (TREE_CODE (mcd.size_var) == INTEGER_CST)
1616 if (!mi->dimension_size)
1618 mi->dimension_size =
1619 (tree *) xcalloc (mi->min_indirect_level_escape,
1620 sizeof (tree));
1621 mi->dimension_size_orig =
1622 (tree *) xcalloc (mi->min_indirect_level_escape,
1623 sizeof (tree));
1625 mi->dimension_size[level] = mcd.size_var;
1626 mi->dimension_size_orig[level] = mcd.size_var;
1627 continue;
1629 /* ??? Here we should also add the way to calculate the size
1630 expression not only know that it is anticipated. */
1631 sbitmap_zero (visited);
1632 size = can_calculate_expr_before_stmt (mcd.size_var, visited);
1633 if (size == NULL_TREE)
1635 mark_min_matrix_escape_level (mi, level, call_stmt);
1636 if (dump_file)
1637 fprintf (dump_file,
1638 "Matrix %s: Cannot calculate the size of allocation, escaping at level %d\n",
1639 get_name (mi->decl), level);
1640 break;
1642 if (!mi->dimension_size)
1644 mi->dimension_size =
1645 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1646 mi->dimension_size_orig =
1647 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1649 mi->dimension_size[level] = size;
1650 mi->dimension_size_orig[level] = size;
1653 /* We don't need those anymore. */
1654 for (level = mi->min_indirect_level_escape;
1655 level < mi->max_malloced_level; level++)
1656 mi->malloc_for_level[level] = NULL;
1657 return 1;
1660 /* Track all access and allocation sites. */
1661 static void
1662 find_sites_in_func (bool record)
1664 sbitmap visited_stmts_1;
1666 gimple_stmt_iterator gsi;
1667 gimple stmt;
1668 basic_block bb;
1669 struct matrix_info tmpmi, *mi;
1671 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1673 FOR_EACH_BB (bb)
1675 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1677 tree lhs;
1679 stmt = gsi_stmt (gsi);
1680 lhs = gimple_get_lhs (stmt);
1681 if (lhs != NULL_TREE
1682 && TREE_CODE (lhs) == VAR_DECL)
1684 tmpmi.decl = lhs;
1685 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1686 &tmpmi)))
1688 sbitmap_zero (visited_stmts_1);
1689 analyze_matrix_allocation_site (mi, stmt, 0, visited_stmts_1);
1692 if (is_gimple_assign (stmt)
1693 && gimple_assign_single_p (stmt)
1694 && TREE_CODE (lhs) == SSA_NAME
1695 && TREE_CODE (gimple_assign_rhs1 (stmt)) == VAR_DECL)
1697 tmpmi.decl = gimple_assign_rhs1 (stmt);
1698 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1699 &tmpmi)))
1701 sbitmap_zero (visited_stmts_1);
1702 analyze_matrix_accesses (mi, lhs, 0,
1703 false, visited_stmts_1, record);
1708 sbitmap_free (visited_stmts_1);
1711 /* Traverse the use-def chains to see if there are matrices that
1712 are passed through pointers and we cannot know how they are accessed.
1713 For each SSA-name defined by a global variable of our interest,
1714 we traverse the use-def chains of the SSA and follow the indirections,
1715 and record in what level of indirection the use of the variable
1716 escapes. A use of a pointer escapes when it is passed to a function,
1717 stored into memory or assigned (except in malloc and free calls). */
1719 static void
1720 record_all_accesses_in_func (void)
1722 unsigned i;
1723 sbitmap visited_stmts_1;
1725 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1727 for (i = 0; i < num_ssa_names; i++)
1729 struct matrix_info tmpmi, *mi;
1730 tree ssa_var = ssa_name (i);
1731 tree rhs, lhs;
1733 if (!ssa_var
1734 || !is_gimple_assign (SSA_NAME_DEF_STMT (ssa_var))
1735 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (ssa_var)))
1736 continue;
1737 rhs = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (ssa_var));
1738 lhs = gimple_assign_lhs (SSA_NAME_DEF_STMT (ssa_var));
1739 if (TREE_CODE (rhs) != VAR_DECL && TREE_CODE (lhs) != VAR_DECL)
1740 continue;
1742 /* If the RHS is a matrix that we want to analyze, follow the def-use
1743 chain for this SSA_VAR and check for escapes or apply the
1744 flattening. */
1745 tmpmi.decl = rhs;
1746 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi)))
1748 /* This variable will track the visited PHI nodes, so we can limit
1749 its size to the maximum number of SSA names. */
1750 sbitmap_zero (visited_stmts_1);
1751 analyze_matrix_accesses (mi, ssa_var,
1752 0, false, visited_stmts_1, true);
1756 sbitmap_free (visited_stmts_1);
1759 /* Used when we want to convert the expression: RESULT = something *
1760 ORIG to RESULT = something * NEW_VAL. If ORIG and NEW_VAL are power
1761 of 2, shift operations can be done, else division and
1762 multiplication. */
1764 static tree
1765 compute_offset (HOST_WIDE_INT orig, HOST_WIDE_INT new_val, tree result)
1768 int x, y;
1769 tree result1, ratio, log, orig_tree, new_tree;
1771 x = exact_log2 (orig);
1772 y = exact_log2 (new_val);
1774 if (x != -1 && y != -1)
1776 if (x == y)
1777 return result;
1778 else if (x > y)
1780 log = build_int_cst (TREE_TYPE (result), x - y);
1781 result1 =
1782 fold_build2 (LSHIFT_EXPR, TREE_TYPE (result), result, log);
1783 return result1;
1785 log = build_int_cst (TREE_TYPE (result), y - x);
1786 result1 = fold_build2 (RSHIFT_EXPR, TREE_TYPE (result), result, log);
1788 return result1;
1790 orig_tree = build_int_cst (TREE_TYPE (result), orig);
1791 new_tree = build_int_cst (TREE_TYPE (result), new_val);
1792 ratio = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (result), result, orig_tree);
1793 result1 = fold_build2 (MULT_EXPR, TREE_TYPE (result), ratio, new_tree);
1795 return result1;
1799 /* We know that we are allowed to perform matrix flattening (according to the
1800 escape analysis), so we traverse the use-def chains of the SSA vars
1801 defined by the global variables pointing to the matrices of our interest.
1802 in each use of the SSA we calculate the offset from the base address
1803 according to the following equation:
1805 a[I1][I2]...[Ik] , where D1..Dk is the length of each dimension and the
1806 escaping level is m <= k, and a' is the new allocated matrix,
1807 will be translated to :
1809 b[I(m+1)]...[Ik]
1811 where
1812 b = a' + I1*D2...*Dm + I2*D3...Dm + ... + Im
1815 static int
1816 transform_access_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1818 gimple_stmt_iterator gsi;
1819 struct matrix_info *mi = (struct matrix_info *) *slot;
1820 int min_escape_l = mi->min_indirect_level_escape;
1821 struct access_site_info *acc_info;
1822 enum tree_code code;
1823 int i;
1825 if (min_escape_l < 2 || !mi->access_l)
1826 return 1;
1827 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
1828 i++)
1830 /* This is possible because we collect the access sites before
1831 we determine the final minimum indirection level. */
1832 if (acc_info->level >= min_escape_l)
1834 free (acc_info);
1835 continue;
1837 if (acc_info->is_alloc)
1839 if (acc_info->level >= 0 && gimple_bb (acc_info->stmt))
1841 ssa_op_iter iter;
1842 tree def;
1843 gimple stmt = acc_info->stmt;
1844 tree lhs;
1846 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
1847 mark_sym_for_renaming (SSA_NAME_VAR (def));
1848 gsi = gsi_for_stmt (stmt);
1849 gcc_assert (is_gimple_assign (acc_info->stmt));
1850 lhs = gimple_assign_lhs (acc_info->stmt);
1851 if (TREE_CODE (lhs) == SSA_NAME
1852 && acc_info->level < min_escape_l - 1)
1854 imm_use_iterator imm_iter;
1855 use_operand_p use_p;
1856 gimple use_stmt;
1858 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
1859 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1861 tree rhs, tmp;
1862 gimple new_stmt;
1864 gcc_assert (gimple_assign_rhs_code (acc_info->stmt)
1865 == INDIRECT_REF);
1866 /* Emit convert statement to convert to type of use. */
1867 tmp = create_tmp_var (TREE_TYPE (lhs), "new");
1868 add_referenced_var (tmp);
1869 rhs = gimple_assign_rhs1 (acc_info->stmt);
1870 rhs = fold_convert (TREE_TYPE (tmp),
1871 TREE_OPERAND (rhs, 0));
1872 new_stmt = gimple_build_assign (tmp, rhs);
1873 tmp = make_ssa_name (tmp, new_stmt);
1874 gimple_assign_set_lhs (new_stmt, tmp);
1875 gsi = gsi_for_stmt (acc_info->stmt);
1876 gsi_insert_after (&gsi, new_stmt, GSI_SAME_STMT);
1877 SET_USE (use_p, tmp);
1880 if (acc_info->level < min_escape_l - 1)
1881 gsi_remove (&gsi, true);
1883 free (acc_info);
1884 continue;
1886 code = gimple_assign_rhs_code (acc_info->stmt);
1887 if (code == INDIRECT_REF
1888 && acc_info->level < min_escape_l - 1)
1890 /* Replace the INDIRECT_REF with NOP (cast) usually we are casting
1891 from "pointer to type" to "type". */
1892 tree t =
1893 build1 (NOP_EXPR, TREE_TYPE (gimple_assign_rhs1 (acc_info->stmt)),
1894 TREE_OPERAND (gimple_assign_rhs1 (acc_info->stmt), 0));
1895 gimple_assign_set_rhs_code (acc_info->stmt, NOP_EXPR);
1896 gimple_assign_set_rhs1 (acc_info->stmt, t);
1898 else if (code == POINTER_PLUS_EXPR
1899 && acc_info->level < (min_escape_l))
1901 imm_use_iterator imm_iter;
1902 use_operand_p use_p;
1904 tree offset;
1905 int k = acc_info->level;
1906 tree num_elements, total_elements;
1907 tree tmp1;
1908 tree d_size = mi->dimension_size[k];
1910 /* We already make sure in the analysis that the first operand
1911 is the base and the second is the offset. */
1912 offset = acc_info->offset;
1913 if (mi->dim_map[k] == min_escape_l - 1)
1915 if (!check_transpose_p || mi->is_transposed_p == false)
1916 tmp1 = offset;
1917 else
1919 tree new_offset;
1920 tree d_type_size, d_type_size_k;
1922 d_type_size = size_int (mi->dimension_type_size[min_escape_l]);
1923 d_type_size_k = size_int (mi->dimension_type_size[k + 1]);
1925 new_offset =
1926 compute_offset (mi->dimension_type_size[min_escape_l],
1927 mi->dimension_type_size[k + 1], offset);
1929 total_elements = new_offset;
1930 if (new_offset != offset)
1932 gsi = gsi_for_stmt (acc_info->stmt);
1933 tmp1 = force_gimple_operand_gsi (&gsi, total_elements,
1934 true, NULL,
1935 true, GSI_SAME_STMT);
1937 else
1938 tmp1 = offset;
1941 else
1943 d_size = mi->dimension_size[mi->dim_map[k] + 1];
1944 num_elements =
1945 fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, acc_info->index),
1946 fold_convert (sizetype, d_size));
1947 add_referenced_var (d_size);
1948 gsi = gsi_for_stmt (acc_info->stmt);
1949 tmp1 = force_gimple_operand_gsi (&gsi, num_elements, true,
1950 NULL, true, GSI_SAME_STMT);
1952 /* Replace the offset if needed. */
1953 if (tmp1 != offset)
1955 if (TREE_CODE (offset) == SSA_NAME)
1957 gimple use_stmt;
1959 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, offset)
1960 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1961 if (use_stmt == acc_info->stmt)
1962 SET_USE (use_p, tmp1);
1964 else
1966 gcc_assert (TREE_CODE (offset) == INTEGER_CST);
1967 gimple_assign_set_rhs2 (acc_info->stmt, tmp1);
1968 update_stmt (acc_info->stmt);
1972 /* ??? meanwhile this happens because we record the same access
1973 site more than once; we should be using a hash table to
1974 avoid this and insert the STMT of the access site only
1975 once.
1976 else
1977 gcc_unreachable (); */
1978 free (acc_info);
1980 VEC_free (access_site_info_p, heap, mi->access_l);
1982 update_ssa (TODO_update_ssa);
1983 #ifdef ENABLE_CHECKING
1984 verify_ssa (true);
1985 #endif
1986 return 1;
1989 /* Sort A array of counts. Arrange DIM_MAP to reflect the new order. */
1991 static void
1992 sort_dim_hot_level (gcov_type * a, int *dim_map, int n)
1994 int i, j, tmp1;
1995 gcov_type tmp;
1997 for (i = 0; i < n - 1; i++)
1999 for (j = 0; j < n - 1 - i; j++)
2001 if (a[j + 1] < a[j])
2003 tmp = a[j]; /* swap a[j] and a[j+1] */
2004 a[j] = a[j + 1];
2005 a[j + 1] = tmp;
2006 tmp1 = dim_map[j];
2007 dim_map[j] = dim_map[j + 1];
2008 dim_map[j + 1] = tmp1;
2014 /* Replace multiple mallocs (one for each dimension) to one malloc
2015 with the size of DIM1*DIM2*...*DIMN*size_of_element
2016 Make sure that we hold the size in the malloc site inside a
2017 new global variable; this way we ensure that the size doesn't
2018 change and it is accessible from all the other functions that
2019 uses the matrix. Also, the original calls to free are deleted,
2020 and replaced by a new call to free the flattened matrix. */
2022 static int
2023 transform_allocation_sites (void **slot, void *data ATTRIBUTE_UNUSED)
2025 int i;
2026 struct matrix_info *mi;
2027 tree type, oldfn, prev_dim_size;
2028 gimple call_stmt_0, use_stmt;
2029 struct cgraph_node *c_node;
2030 struct cgraph_edge *e;
2031 gimple_stmt_iterator gsi;
2032 struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE};
2033 HOST_WIDE_INT element_size;
2035 imm_use_iterator imm_iter;
2036 use_operand_p use_p;
2037 tree old_size_0, tmp;
2038 int min_escape_l;
2039 int id;
2041 mi = (struct matrix_info *) *slot;
2043 min_escape_l = mi->min_indirect_level_escape;
2045 if (!mi->malloc_for_level)
2046 mi->min_indirect_level_escape = 0;
2048 if (mi->min_indirect_level_escape < 2)
2049 return 1;
2051 mi->dim_map = (int *) xcalloc (mi->min_indirect_level_escape, sizeof (int));
2052 for (i = 0; i < mi->min_indirect_level_escape; i++)
2053 mi->dim_map[i] = i;
2054 if (check_transpose_p)
2056 int i;
2058 if (dump_file)
2060 fprintf (dump_file, "Matrix %s:\n", get_name (mi->decl));
2061 for (i = 0; i < min_escape_l; i++)
2063 fprintf (dump_file, "dim %d before sort ", i);
2064 if (mi->dim_hot_level)
2065 fprintf (dump_file,
2066 "count is " HOST_WIDEST_INT_PRINT_DEC " \n",
2067 mi->dim_hot_level[i]);
2070 sort_dim_hot_level (mi->dim_hot_level, mi->dim_map,
2071 mi->min_indirect_level_escape);
2072 if (dump_file)
2073 for (i = 0; i < min_escape_l; i++)
2075 fprintf (dump_file, "dim %d after sort\n", i);
2076 if (mi->dim_hot_level)
2077 fprintf (dump_file, "count is " HOST_WIDE_INT_PRINT_DEC
2078 " \n", (HOST_WIDE_INT) mi->dim_hot_level[i]);
2080 for (i = 0; i < mi->min_indirect_level_escape; i++)
2082 if (dump_file)
2083 fprintf (dump_file, "dim_map[%d] after sort %d\n", i,
2084 mi->dim_map[i]);
2085 if (mi->dim_map[i] != i)
2087 if (dump_file)
2088 fprintf (dump_file,
2089 "Transposed dimensions: dim %d is now dim %d\n",
2090 mi->dim_map[i], i);
2091 mi->is_transposed_p = true;
2095 else
2097 for (i = 0; i < mi->min_indirect_level_escape; i++)
2098 mi->dim_map[i] = i;
2100 /* Call statement of allocation site of level 0. */
2101 call_stmt_0 = mi->malloc_for_level[0];
2103 /* Finds the correct malloc information. */
2104 collect_data_for_malloc_call (call_stmt_0, &mcd);
2106 mi->dimension_size[0] = mcd.size_var;
2107 mi->dimension_size_orig[0] = mcd.size_var;
2108 /* Make sure that the variables in the size expression for
2109 all the dimensions (above level 0) aren't modified in
2110 the allocation function. */
2111 for (i = 1; i < mi->min_indirect_level_escape; i++)
2113 tree t;
2114 check_var_data data;
2116 /* mi->dimension_size must contain the expression of the size calculated
2117 in check_allocation_function. */
2118 gcc_assert (mi->dimension_size[i]);
2120 data.fn = mi->allocation_function_decl;
2121 data.stmt = NULL;
2122 t = walk_tree_without_duplicates (&(mi->dimension_size[i]),
2123 check_var_notmodified_p,
2124 &data);
2125 if (t != NULL_TREE)
2127 mark_min_matrix_escape_level (mi, i, data.stmt);
2128 break;
2132 if (mi->min_indirect_level_escape < 2)
2133 return 1;
2135 /* Since we should make sure that the size expression is available
2136 before the call to malloc of level 0. */
2137 gsi = gsi_for_stmt (call_stmt_0);
2139 /* Find out the size of each dimension by looking at the malloc
2140 sites and create a global variable to hold it.
2141 We add the assignment to the global before the malloc of level 0. */
2143 /* To be able to produce gimple temporaries. */
2144 oldfn = current_function_decl;
2145 current_function_decl = mi->allocation_function_decl;
2146 push_cfun (DECL_STRUCT_FUNCTION (mi->allocation_function_decl));
2148 /* Set the dimension sizes as follows:
2149 DIM_SIZE[i] = DIM_SIZE[n] * ... * DIM_SIZE[i]
2150 where n is the maximum non escaping level. */
2151 element_size = mi->dimension_type_size[mi->min_indirect_level_escape];
2152 prev_dim_size = NULL_TREE;
2154 for (i = mi->min_indirect_level_escape - 1; i >= 0; i--)
2156 tree dim_size, dim_var;
2157 gimple stmt;
2158 tree d_type_size;
2160 /* Now put the size expression in a global variable and initialize it to
2161 the size expression before the malloc of level 0. */
2162 dim_var =
2163 add_new_static_var (TREE_TYPE
2164 (mi->dimension_size_orig[mi->dim_map[i]]));
2165 type = TREE_TYPE (mi->dimension_size_orig[mi->dim_map[i]]);
2167 /* DIM_SIZE = MALLOC_SIZE_PARAM / TYPE_SIZE. */
2168 /* Find which dim ID becomes dim I. */
2169 for (id = 0; id < mi->min_indirect_level_escape; id++)
2170 if (mi->dim_map[id] == i)
2171 break;
2172 d_type_size =
2173 build_int_cst (type, mi->dimension_type_size[id + 1]);
2174 if (!prev_dim_size)
2175 prev_dim_size = build_int_cst (type, element_size);
2176 if (!check_transpose_p && i == mi->min_indirect_level_escape - 1)
2178 dim_size = mi->dimension_size_orig[id];
2180 else
2182 dim_size =
2183 fold_build2 (TRUNC_DIV_EXPR, type, mi->dimension_size_orig[id],
2184 d_type_size);
2186 dim_size = fold_build2 (MULT_EXPR, type, dim_size, prev_dim_size);
2188 dim_size = force_gimple_operand_gsi (&gsi, dim_size, true, NULL,
2189 true, GSI_SAME_STMT);
2190 /* GLOBAL_HOLDING_THE_SIZE = DIM_SIZE. */
2191 stmt = gimple_build_assign (dim_var, dim_size);
2192 mark_symbols_for_renaming (stmt);
2193 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2195 prev_dim_size = mi->dimension_size[i] = dim_var;
2197 update_ssa (TODO_update_ssa);
2198 /* Replace the malloc size argument in the malloc of level 0 to be
2199 the size of all the dimensions. */
2200 c_node = cgraph_node (mi->allocation_function_decl);
2201 old_size_0 = gimple_call_arg (call_stmt_0, 0);
2202 tmp = force_gimple_operand_gsi (&gsi, mi->dimension_size[0], true,
2203 NULL, true, GSI_SAME_STMT);
2204 if (TREE_CODE (old_size_0) == SSA_NAME)
2206 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, old_size_0)
2207 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2208 if (use_stmt == call_stmt_0)
2209 SET_USE (use_p, tmp);
2211 /* When deleting the calls to malloc we need also to remove the edge from
2212 the call graph to keep it consistent. Notice that cgraph_edge may
2213 create a new node in the call graph if there is no node for the given
2214 declaration; this shouldn't be the case but currently there is no way to
2215 check this outside of "cgraph.c". */
2216 for (i = 1; i < mi->min_indirect_level_escape; i++)
2218 gimple_stmt_iterator gsi;
2219 gimple use_stmt1 = NULL;
2221 gimple call_stmt = mi->malloc_for_level[i];
2222 gcc_assert (is_gimple_call (call_stmt));
2223 e = cgraph_edge (c_node, call_stmt);
2224 gcc_assert (e);
2225 cgraph_remove_edge (e);
2226 gsi = gsi_for_stmt (call_stmt);
2227 /* Remove the call stmt. */
2228 gsi_remove (&gsi, true);
2229 /* remove the type cast stmt. */
2230 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2231 gimple_call_lhs (call_stmt))
2233 use_stmt1 = use_stmt;
2234 gsi = gsi_for_stmt (use_stmt);
2235 gsi_remove (&gsi, true);
2237 /* Remove the assignment of the allocated area. */
2238 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2239 gimple_get_lhs (use_stmt1))
2241 gsi = gsi_for_stmt (use_stmt);
2242 gsi_remove (&gsi, true);
2245 update_ssa (TODO_update_ssa);
2246 #ifdef ENABLE_CHECKING
2247 verify_ssa (true);
2248 #endif
2249 /* Delete the calls to free. */
2250 for (i = 1; i < mi->min_indirect_level_escape; i++)
2252 gimple_stmt_iterator gsi;
2254 /* ??? wonder why this case is possible but we failed on it once. */
2255 if (!mi->free_stmts[i].stmt)
2256 continue;
2258 c_node = cgraph_node (mi->free_stmts[i].func);
2259 gcc_assert (is_gimple_call (mi->free_stmts[i].stmt));
2260 e = cgraph_edge (c_node, mi->free_stmts[i].stmt);
2261 gcc_assert (e);
2262 cgraph_remove_edge (e);
2263 current_function_decl = mi->free_stmts[i].func;
2264 set_cfun (DECL_STRUCT_FUNCTION (mi->free_stmts[i].func));
2265 gsi = gsi_for_stmt (mi->free_stmts[i].stmt);
2266 gsi_remove (&gsi, true);
2268 /* Return to the previous situation. */
2269 current_function_decl = oldfn;
2270 pop_cfun ();
2271 return 1;
2276 /* Print out the results of the escape analysis. */
2277 static int
2278 dump_matrix_reorg_analysis (void **slot, void *data ATTRIBUTE_UNUSED)
2280 struct matrix_info *mi = (struct matrix_info *) *slot;
2282 if (!dump_file)
2283 return 1;
2284 fprintf (dump_file, "Matrix \"%s\"; Escaping Level: %d, Num Dims: %d,",
2285 get_name (mi->decl), mi->min_indirect_level_escape, mi->num_dims);
2286 fprintf (dump_file, " Malloc Dims: %d, ", mi->max_malloced_level);
2287 fprintf (dump_file, "\n");
2288 if (mi->min_indirect_level_escape >= 2)
2289 fprintf (dump_file, "Flattened %d dimensions \n",
2290 mi->min_indirect_level_escape);
2291 return 1;
2294 /* Perform matrix flattening. */
2296 static unsigned int
2297 matrix_reorg (void)
2299 struct cgraph_node *node;
2301 if (profile_info)
2302 check_transpose_p = true;
2303 else
2304 check_transpose_p = false;
2305 /* If there are hand written vectors, we skip this optimization. */
2306 for (node = cgraph_nodes; node; node = node->next)
2307 if (!may_flatten_matrices (node))
2308 return 0;
2309 matrices_to_reorg = htab_create (37, mtt_info_hash, mtt_info_eq, mat_free);
2310 /* Find and record all potential matrices in the program. */
2311 find_matrices_decl ();
2312 /* Analyze the accesses of the matrices (escaping analysis). */
2313 for (node = cgraph_nodes; node; node = node->next)
2314 if (node->analyzed)
2316 tree temp_fn;
2318 temp_fn = current_function_decl;
2319 current_function_decl = node->decl;
2320 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2321 bitmap_obstack_initialize (NULL);
2322 gimple_register_cfg_hooks ();
2324 if (!gimple_in_ssa_p (cfun))
2326 free_dominance_info (CDI_DOMINATORS);
2327 free_dominance_info (CDI_POST_DOMINATORS);
2328 pop_cfun ();
2329 current_function_decl = temp_fn;
2330 bitmap_obstack_release (NULL);
2332 return 0;
2335 #ifdef ENABLE_CHECKING
2336 verify_flow_info ();
2337 #endif
2339 if (!matrices_to_reorg)
2341 free_dominance_info (CDI_DOMINATORS);
2342 free_dominance_info (CDI_POST_DOMINATORS);
2343 pop_cfun ();
2344 current_function_decl = temp_fn;
2345 bitmap_obstack_release (NULL);
2347 return 0;
2350 /* Create htap for phi nodes. */
2351 htab_mat_acc_phi_nodes = htab_create (37, mat_acc_phi_hash,
2352 mat_acc_phi_eq, free);
2353 if (!check_transpose_p)
2354 find_sites_in_func (false);
2355 else
2357 find_sites_in_func (true);
2358 loop_optimizer_init (LOOPS_NORMAL);
2359 if (current_loops)
2360 scev_initialize ();
2361 htab_traverse (matrices_to_reorg, analyze_transpose, NULL);
2362 if (current_loops)
2364 scev_finalize ();
2365 loop_optimizer_finalize ();
2366 current_loops = NULL;
2369 /* If the current function is the allocation function for any of
2370 the matrices we check its allocation and the escaping level. */
2371 htab_traverse (matrices_to_reorg, check_allocation_function, NULL);
2372 free_dominance_info (CDI_DOMINATORS);
2373 free_dominance_info (CDI_POST_DOMINATORS);
2374 pop_cfun ();
2375 current_function_decl = temp_fn;
2376 bitmap_obstack_release (NULL);
2378 htab_traverse (matrices_to_reorg, transform_allocation_sites, NULL);
2379 /* Now transform the accesses. */
2380 for (node = cgraph_nodes; node; node = node->next)
2381 if (node->analyzed)
2383 /* Remember that allocation sites have been handled. */
2384 tree temp_fn;
2386 temp_fn = current_function_decl;
2387 current_function_decl = node->decl;
2388 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2389 bitmap_obstack_initialize (NULL);
2390 gimple_register_cfg_hooks ();
2391 record_all_accesses_in_func ();
2392 htab_traverse (matrices_to_reorg, transform_access_sites, NULL);
2393 free_dominance_info (CDI_DOMINATORS);
2394 free_dominance_info (CDI_POST_DOMINATORS);
2395 pop_cfun ();
2396 current_function_decl = temp_fn;
2397 bitmap_obstack_release (NULL);
2399 htab_traverse (matrices_to_reorg, dump_matrix_reorg_analysis, NULL);
2401 current_function_decl = NULL;
2402 set_cfun (NULL);
2403 matrices_to_reorg = NULL;
2404 return 0;
2408 /* The condition for matrix flattening to be performed. */
2409 static bool
2410 gate_matrix_reorg (void)
2412 return flag_ipa_matrix_reorg && flag_whole_program;
2415 struct simple_ipa_opt_pass pass_ipa_matrix_reorg =
2418 SIMPLE_IPA_PASS,
2419 "matrix-reorg", /* name */
2420 gate_matrix_reorg, /* gate */
2421 matrix_reorg, /* execute */
2422 NULL, /* sub */
2423 NULL, /* next */
2424 0, /* static_pass_number */
2425 TV_NONE, /* tv_id */
2426 0, /* properties_required */
2427 0, /* properties_provided */
2428 0, /* properties_destroyed */
2429 0, /* todo_flags_start */
2430 TODO_dump_cgraph | TODO_dump_func /* todo_flags_finish */