Enable dumping of alias graphs.
[official-gcc/Ramakrishna.git] / gcc / matrix-reorg.c
blob12e85b138a607e52cc6d1ee9fcb3299ad6c599c5
1 /* Matrix layout transformations.
2 Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
3 Contributed by Razya Ladelsky <razya@il.ibm.com>
4 Originally written by Revital Eres and Mustafa Hagog.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
23 Matrix flattening optimization tries to replace a N-dimensional
24 matrix with its equivalent M-dimensional matrix, where M < N.
25 This first implementation focuses on global matrices defined dynamically.
27 When N==1, we actually flatten the whole matrix.
28 For instance consider a two-dimensional array a [dim1] [dim2].
29 The code for allocating space for it usually looks like:
31 a = (int **) malloc(dim1 * sizeof(int *));
32 for (i=0; i<dim1; i++)
33 a[i] = (int *) malloc (dim2 * sizeof(int));
35 If the array "a" is found suitable for this optimization,
36 its allocation is replaced by:
38 a = (int *) malloc (dim1 * dim2 *sizeof(int));
40 and all the references to a[i][j] are replaced by a[i * dim2 + j].
42 The two main phases of the optimization are the analysis
43 and transformation.
44 The driver of the optimization is matrix_reorg ().
48 Analysis phase:
49 ===============
51 We'll number the dimensions outside-in, meaning the most external
52 is 0, then 1, and so on.
53 The analysis part of the optimization determines K, the escape
54 level of a N-dimensional matrix (K <= N), that allows flattening of
55 the external dimensions 0,1,..., K-1. Escape level 0 means that the
56 whole matrix escapes and no flattening is possible.
58 The analysis part is implemented in analyze_matrix_allocation_site()
59 and analyze_matrix_accesses().
61 Transformation phase:
62 =====================
63 In this phase we define the new flattened matrices that replace the
64 original matrices in the code.
65 Implemented in transform_allocation_sites(),
66 transform_access_sites().
68 Matrix Transposing
69 ==================
70 The idea of Matrix Transposing is organizing the matrix in a different
71 layout such that the dimensions are reordered.
72 This could produce better cache behavior in some cases.
74 For example, lets look at the matrix accesses in the following loop:
76 for (i=0; i<N; i++)
77 for (j=0; j<M; j++)
78 access to a[i][j]
80 This loop can produce good cache behavior because the elements of
81 the inner dimension are accessed sequentially.
83 However, if the accesses of the matrix were of the following form:
85 for (i=0; i<N; i++)
86 for (j=0; j<M; j++)
87 access to a[j][i]
89 In this loop we iterate the columns and not the rows.
90 Therefore, replacing the rows and columns
91 would have had an organization with better (cache) locality.
92 Replacing the dimensions of the matrix is called matrix transposing.
94 This example, of course, could be enhanced to multiple dimensions matrices
95 as well.
97 Since a program could include all kind of accesses, there is a decision
98 mechanism, implemented in analyze_transpose(), which implements a
99 heuristic that tries to determine whether to transpose the matrix or not,
100 according to the form of the more dominant accesses.
101 This decision is transferred to the flattening mechanism, and whether
102 the matrix was transposed or not, the matrix is flattened (if possible).
104 This decision making is based on profiling information and loop information.
105 If profiling information is available, decision making mechanism will be
106 operated, otherwise the matrix will only be flattened (if possible).
108 Both optimizations are described in the paper "Matrix flattening and
109 transposing in GCC" which was presented in GCC summit 2006.
110 http://www.gccsummit.org/2006/2006-GCC-Summit-Proceedings.pdf. */
112 #include "config.h"
113 #include "system.h"
114 #include "coretypes.h"
115 #include "tm.h"
116 #include "tree.h"
117 #include "rtl.h"
118 #include "tree-inline.h"
119 #include "tree-flow.h"
120 #include "tree-flow-inline.h"
121 #include "langhooks.h"
122 #include "hashtab.h"
123 #include "toplev.h"
124 #include "flags.h"
125 #include "ggc.h"
126 #include "debug.h"
127 #include "target.h"
128 #include "cgraph.h"
129 #include "diagnostic.h"
130 #include "timevar.h"
131 #include "params.h"
132 #include "fibheap.h"
133 #include "intl.h"
134 #include "function.h"
135 #include "basic-block.h"
136 #include "cfgloop.h"
137 #include "tree-iterator.h"
138 #include "tree-pass.h"
139 #include "opts.h"
140 #include "tree-chrec.h"
141 #include "tree-scalar-evolution.h"
142 #include "tree-data-ref.h"
143 #include "tree-ssa-sccvn.h"
145 /* We need to collect a lot of data from the original malloc,
146 particularly as the gimplifier has converted:
148 orig_var = (struct_type *) malloc (x * sizeof (struct_type *));
150 into
152 T3 = <constant> ; ** <constant> is amount to malloc; precomputed **
153 T4 = malloc (T3);
154 T5 = (struct_type *) T4;
155 orig_var = T5;
157 The following struct fields allow us to collect all the necessary data from
158 the gimplified program. The comments in the struct below are all based
159 on the gimple example above. */
161 struct malloc_call_data
163 gimple call_stmt; /* Tree for "T4 = malloc (T3);" */
164 tree size_var; /* Var decl for T3. */
165 tree malloc_size; /* Tree for "<constant>", the rhs assigned to T3. */
168 static tree can_calculate_expr_before_stmt (tree, sbitmap);
169 static tree can_calculate_stmt_before_stmt (gimple, sbitmap);
171 /* The front end of the compiler, when parsing statements of the form:
173 var = (type_cast) malloc (sizeof (type));
175 always converts this single statement into the following statements
176 (GIMPLE form):
178 T.1 = sizeof (type);
179 T.2 = malloc (T.1);
180 T.3 = (type_cast) T.2;
181 var = T.3;
183 Since we need to create new malloc statements and modify the original
184 statements somewhat, we need to find all four of the above statements.
185 Currently record_call_1 (called for building cgraph edges) finds and
186 records the statements containing the actual call to malloc, but we
187 need to find the rest of the variables/statements on our own. That
188 is what the following function does. */
189 static void
190 collect_data_for_malloc_call (gimple stmt, struct malloc_call_data *m_data)
192 tree size_var = NULL;
193 tree malloc_fn_decl;
194 tree arg1;
196 gcc_assert (is_gimple_call (stmt));
198 malloc_fn_decl = gimple_call_fndecl (stmt);
199 if (malloc_fn_decl == NULL
200 || DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
201 return;
203 arg1 = gimple_call_arg (stmt, 0);
204 size_var = arg1;
206 m_data->call_stmt = stmt;
207 m_data->size_var = size_var;
208 if (TREE_CODE (size_var) != VAR_DECL)
209 m_data->malloc_size = size_var;
210 else
211 m_data->malloc_size = NULL_TREE;
214 /* Information about matrix access site.
215 For example: if an access site of matrix arr is arr[i][j]
216 the ACCESS_SITE_INFO structure will have the address
217 of arr as its stmt. The INDEX_INFO will hold information about the
218 initial address and index of each dimension. */
219 struct access_site_info
221 /* The statement (INDIRECT_REF or POINTER_PLUS_EXPR). */
222 gimple stmt;
224 /* In case of POINTER_PLUS_EXPR, what is the offset. */
225 tree offset;
227 /* The index which created the offset. */
228 tree index;
230 /* The indirection level of this statement. */
231 int level;
233 /* TRUE for allocation site FALSE for access site. */
234 bool is_alloc;
236 /* The function containing the access site. */
237 tree function_decl;
239 /* This access is iterated in the inner most loop */
240 bool iterated_by_inner_most_loop_p;
243 typedef struct access_site_info *access_site_info_p;
244 DEF_VEC_P (access_site_info_p);
245 DEF_VEC_ALLOC_P (access_site_info_p, heap);
247 /* Calls to free when flattening a matrix. */
249 struct free_info
251 gimple stmt;
252 tree func;
255 /* Information about matrix to flatten. */
256 struct matrix_info
258 /* Decl tree of this matrix. */
259 tree decl;
260 /* Number of dimensions; number
261 of "*" in the type declaration. */
262 int num_dims;
264 /* Minimum indirection level that escapes, 0 means that
265 the whole matrix escapes, k means that dimensions
266 0 to ACTUAL_DIM - k escapes. */
267 int min_indirect_level_escape;
269 gimple min_indirect_level_escape_stmt;
271 /* Hold the allocation site for each level (dimension).
272 We can use NUM_DIMS as the upper bound and allocate the array
273 once with this number of elements and no need to use realloc and
274 MAX_MALLOCED_LEVEL. */
275 gimple *malloc_for_level;
277 int max_malloced_level;
279 /* Is the matrix transposed. */
280 bool is_transposed_p;
282 /* The location of the allocation sites (they must be in one
283 function). */
284 tree allocation_function_decl;
286 /* The calls to free for each level of indirection. */
287 struct free_info *free_stmts;
289 /* An array which holds for each dimension its size. where
290 dimension 0 is the outer most (one that contains all the others).
292 tree *dimension_size;
294 /* An array which holds for each dimension it's original size
295 (before transposing and flattening take place). */
296 tree *dimension_size_orig;
298 /* An array which holds for each dimension the size of the type of
299 of elements accessed in that level (in bytes). */
300 HOST_WIDE_INT *dimension_type_size;
302 int dimension_type_size_len;
304 /* An array collecting the count of accesses for each dimension. */
305 gcov_type *dim_hot_level;
307 /* An array of the accesses to be flattened.
308 elements are of type "struct access_site_info *". */
309 VEC (access_site_info_p, heap) * access_l;
311 /* A map of how the dimensions will be organized at the end of
312 the analyses. */
313 int *dim_map;
316 /* In each phi node we want to record the indirection level we have when we
317 get to the phi node. Usually we will have phi nodes with more than two
318 arguments, then we must assure that all of them get to the phi node with
319 the same indirection level, otherwise it's not safe to do the flattening.
320 So we record the information regarding the indirection level each time we
321 get to the phi node in this hash table. */
323 struct matrix_access_phi_node
325 gimple phi;
326 int indirection_level;
329 /* We use this structure to find if the SSA variable is accessed inside the
330 tree and record the tree containing it. */
332 struct ssa_acc_in_tree
334 /* The variable whose accesses in the tree we are looking for. */
335 tree ssa_var;
336 /* The tree and code inside it the ssa_var is accessed, currently
337 it could be an INDIRECT_REF or CALL_EXPR. */
338 enum tree_code t_code;
339 tree t_tree;
340 /* The place in the containing tree. */
341 tree *tp;
342 tree second_op;
343 bool var_found;
346 static void analyze_matrix_accesses (struct matrix_info *, tree, int, bool,
347 sbitmap, bool);
348 static int transform_allocation_sites (void **, void *);
349 static int transform_access_sites (void **, void *);
350 static int analyze_transpose (void **, void *);
351 static int dump_matrix_reorg_analysis (void **, void *);
353 static bool check_transpose_p;
355 /* Hash function used for the phi nodes. */
357 static hashval_t
358 mat_acc_phi_hash (const void *p)
360 const struct matrix_access_phi_node *const ma_phi =
361 (const struct matrix_access_phi_node *) p;
363 return htab_hash_pointer (ma_phi->phi);
366 /* Equality means phi node pointers are the same. */
368 static int
369 mat_acc_phi_eq (const void *p1, const void *p2)
371 const struct matrix_access_phi_node *const phi1 =
372 (const struct matrix_access_phi_node *) p1;
373 const struct matrix_access_phi_node *const phi2 =
374 (const struct matrix_access_phi_node *) p2;
376 if (phi1->phi == phi2->phi)
377 return 1;
379 return 0;
382 /* Hold the PHI nodes we visit during the traversal for escaping
383 analysis. */
384 static htab_t htab_mat_acc_phi_nodes = NULL;
386 /* This hash-table holds the information about the matrices we are
387 going to handle. */
388 static htab_t matrices_to_reorg = NULL;
390 /* Return a hash for MTT, which is really a "matrix_info *". */
391 static hashval_t
392 mtt_info_hash (const void *mtt)
394 return htab_hash_pointer (((const struct matrix_info *) mtt)->decl);
397 /* Return true if MTT1 and MTT2 (which are really both of type
398 "matrix_info *") refer to the same decl. */
399 static int
400 mtt_info_eq (const void *mtt1, const void *mtt2)
402 const struct matrix_info *const i1 = (const struct matrix_info *) mtt1;
403 const struct matrix_info *const i2 = (const struct matrix_info *) mtt2;
405 if (i1->decl == i2->decl)
406 return true;
408 return false;
411 /* Return false if STMT may contain a vector expression.
412 In this situation, all matrices should not be flattened. */
413 static bool
414 may_flatten_matrices_1 (gimple stmt)
416 tree t;
418 switch (gimple_code (stmt))
420 case GIMPLE_ASSIGN:
421 if (!gimple_assign_cast_p (stmt))
422 return true;
424 t = gimple_assign_rhs1 (stmt);
425 while (CONVERT_EXPR_P (t))
427 if (TREE_TYPE (t) && POINTER_TYPE_P (TREE_TYPE (t)))
429 tree pointee;
431 pointee = TREE_TYPE (t);
432 while (POINTER_TYPE_P (pointee))
433 pointee = TREE_TYPE (pointee);
434 if (TREE_CODE (pointee) == VECTOR_TYPE)
436 if (dump_file)
437 fprintf (dump_file,
438 "Found vector type, don't flatten matrix\n");
439 return false;
442 t = TREE_OPERAND (t, 0);
444 break;
445 case GIMPLE_ASM:
446 /* Asm code could contain vector operations. */
447 return false;
448 break;
449 default:
450 break;
452 return true;
455 /* Return false if there are hand-written vectors in the program.
456 We disable the flattening in such a case. */
457 static bool
458 may_flatten_matrices (struct cgraph_node *node)
460 tree decl;
461 struct function *func;
462 basic_block bb;
463 gimple_stmt_iterator gsi;
465 decl = node->decl;
466 if (node->analyzed)
468 func = DECL_STRUCT_FUNCTION (decl);
469 FOR_EACH_BB_FN (bb, func)
470 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
471 if (!may_flatten_matrices_1 (gsi_stmt (gsi)))
472 return false;
474 return true;
477 /* Given a VAR_DECL, check its type to determine whether it is
478 a definition of a dynamic allocated matrix and therefore is
479 a suitable candidate for the matrix flattening optimization.
480 Return NULL if VAR_DECL is not such decl. Otherwise, allocate
481 a MATRIX_INFO structure, fill it with the relevant information
482 and return a pointer to it.
483 TODO: handle also statically defined arrays. */
484 static struct matrix_info *
485 analyze_matrix_decl (tree var_decl)
487 struct matrix_info *m_node, tmpmi, *mi;
488 tree var_type;
489 int dim_num = 0;
491 gcc_assert (matrices_to_reorg);
493 if (TREE_CODE (var_decl) == PARM_DECL)
494 var_type = DECL_ARG_TYPE (var_decl);
495 else if (TREE_CODE (var_decl) == VAR_DECL)
496 var_type = TREE_TYPE (var_decl);
497 else
498 return NULL;
500 if (!POINTER_TYPE_P (var_type))
501 return NULL;
503 while (POINTER_TYPE_P (var_type))
505 var_type = TREE_TYPE (var_type);
506 dim_num++;
509 if (dim_num <= 1)
510 return NULL;
512 if (!COMPLETE_TYPE_P (var_type)
513 || TREE_CODE (TYPE_SIZE_UNIT (var_type)) != INTEGER_CST)
514 return NULL;
516 /* Check to see if this pointer is already in there. */
517 tmpmi.decl = var_decl;
518 mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi);
520 if (mi)
521 return NULL;
523 /* Record the matrix. */
525 m_node = (struct matrix_info *) xcalloc (1, sizeof (struct matrix_info));
526 m_node->decl = var_decl;
527 m_node->num_dims = dim_num;
528 m_node->free_stmts
529 = (struct free_info *) xcalloc (dim_num, sizeof (struct free_info));
531 /* Init min_indirect_level_escape to -1 to indicate that no escape
532 analysis has been done yet. */
533 m_node->min_indirect_level_escape = -1;
534 m_node->is_transposed_p = false;
536 return m_node;
539 /* Free matrix E. */
540 static void
541 mat_free (void *e)
543 struct matrix_info *mat = (struct matrix_info *) e;
545 if (!mat)
546 return;
548 if (mat->free_stmts)
549 free (mat->free_stmts);
550 if (mat->dim_hot_level)
551 free (mat->dim_hot_level);
552 if (mat->malloc_for_level)
553 free (mat->malloc_for_level);
556 /* Find all potential matrices.
557 TODO: currently we handle only multidimensional
558 dynamically allocated arrays. */
559 static void
560 find_matrices_decl (void)
562 struct matrix_info *tmp;
563 PTR *slot;
564 struct varpool_node *vnode;
566 gcc_assert (matrices_to_reorg);
568 /* For every global variable in the program:
569 Check to see if it's of a candidate type and record it. */
570 for (vnode = varpool_nodes_queue; vnode; vnode = vnode->next_needed)
572 tree var_decl = vnode->decl;
574 if (!var_decl || TREE_CODE (var_decl) != VAR_DECL)
575 continue;
577 if (matrices_to_reorg)
578 if ((tmp = analyze_matrix_decl (var_decl)))
580 if (!TREE_ADDRESSABLE (var_decl))
582 slot = htab_find_slot (matrices_to_reorg, tmp, INSERT);
583 *slot = tmp;
587 return;
590 /* Mark that the matrix MI escapes at level L. */
591 static void
592 mark_min_matrix_escape_level (struct matrix_info *mi, int l, gimple s)
594 if (mi->min_indirect_level_escape == -1
595 || (mi->min_indirect_level_escape > l))
597 mi->min_indirect_level_escape = l;
598 mi->min_indirect_level_escape_stmt = s;
602 /* Find if the SSA variable is accessed inside the
603 tree and record the tree containing it.
604 The only relevant uses are the case of SSA_NAME, or SSA inside
605 INDIRECT_REF, PLUS_EXPR, POINTER_PLUS_EXPR, MULT_EXPR. */
606 static void
607 ssa_accessed_in_tree (tree t, struct ssa_acc_in_tree *a)
609 a->t_code = TREE_CODE (t);
610 switch (a->t_code)
612 case SSA_NAME:
613 if (t == a->ssa_var)
614 a->var_found = true;
615 break;
616 case INDIRECT_REF:
617 if (SSA_VAR_P (TREE_OPERAND (t, 0))
618 && TREE_OPERAND (t, 0) == a->ssa_var)
619 a->var_found = true;
620 break;
621 default:
622 break;
626 /* Find if the SSA variable is accessed on the right hand side of
627 gimple call STMT. */
629 static void
630 ssa_accessed_in_call_rhs (gimple stmt, struct ssa_acc_in_tree *a)
632 tree decl;
633 tree arg;
634 size_t i;
636 a->t_code = CALL_EXPR;
637 for (i = 0; i < gimple_call_num_args (stmt); i++)
639 arg = gimple_call_arg (stmt, i);
640 if (arg == a->ssa_var)
642 a->var_found = true;
643 decl = gimple_call_fndecl (stmt);
644 a->t_tree = decl;
645 break;
650 /* Find if the SSA variable is accessed on the right hand side of
651 gimple assign STMT. */
653 static void
654 ssa_accessed_in_assign_rhs (gimple stmt, struct ssa_acc_in_tree *a)
657 a->t_code = gimple_assign_rhs_code (stmt);
658 switch (a->t_code)
660 tree op1, op2;
662 case SSA_NAME:
663 case INDIRECT_REF:
664 CASE_CONVERT:
665 case VIEW_CONVERT_EXPR:
666 ssa_accessed_in_tree (gimple_assign_rhs1 (stmt), a);
667 break;
668 case POINTER_PLUS_EXPR:
669 case PLUS_EXPR:
670 case MULT_EXPR:
671 op1 = gimple_assign_rhs1 (stmt);
672 op2 = gimple_assign_rhs2 (stmt);
674 if (op1 == a->ssa_var)
676 a->var_found = true;
677 a->second_op = op2;
679 else if (op2 == a->ssa_var)
681 a->var_found = true;
682 a->second_op = op1;
684 break;
685 default:
686 break;
690 /* Record the access/allocation site information for matrix MI so we can
691 handle it later in transformation. */
692 static void
693 record_access_alloc_site_info (struct matrix_info *mi, gimple stmt, tree offset,
694 tree index, int level, bool is_alloc)
696 struct access_site_info *acc_info;
698 if (!mi->access_l)
699 mi->access_l = VEC_alloc (access_site_info_p, heap, 100);
701 acc_info
702 = (struct access_site_info *)
703 xcalloc (1, sizeof (struct access_site_info));
704 acc_info->stmt = stmt;
705 acc_info->offset = offset;
706 acc_info->index = index;
707 acc_info->function_decl = current_function_decl;
708 acc_info->level = level;
709 acc_info->is_alloc = is_alloc;
711 VEC_safe_push (access_site_info_p, heap, mi->access_l, acc_info);
715 /* Record the malloc as the allocation site of the given LEVEL. But
716 first we Make sure that all the size parameters passed to malloc in
717 all the allocation sites could be pre-calculated before the call to
718 the malloc of level 0 (the main malloc call). */
719 static void
720 add_allocation_site (struct matrix_info *mi, gimple stmt, int level)
722 struct malloc_call_data mcd;
724 /* Make sure that the allocation sites are in the same function. */
725 if (!mi->allocation_function_decl)
726 mi->allocation_function_decl = current_function_decl;
727 else if (mi->allocation_function_decl != current_function_decl)
729 int min_malloc_level;
731 gcc_assert (mi->malloc_for_level);
733 /* Find the minimum malloc level that already has been seen;
734 we known its allocation function must be
735 MI->allocation_function_decl since it's different than
736 CURRENT_FUNCTION_DECL then the escaping level should be
737 MIN (LEVEL, MIN_MALLOC_LEVEL) - 1 , and the allocation function
738 must be set accordingly. */
739 for (min_malloc_level = 0;
740 min_malloc_level < mi->max_malloced_level
741 && mi->malloc_for_level[min_malloc_level]; min_malloc_level++);
742 if (level < min_malloc_level)
744 mi->allocation_function_decl = current_function_decl;
745 mark_min_matrix_escape_level (mi, min_malloc_level, stmt);
747 else
749 mark_min_matrix_escape_level (mi, level, stmt);
750 /* cannot be that (level == min_malloc_level)
751 we would have returned earlier. */
752 return;
756 /* Find the correct malloc information. */
757 collect_data_for_malloc_call (stmt, &mcd);
759 /* We accept only calls to malloc function; we do not accept
760 calls like calloc and realloc. */
761 if (!mi->malloc_for_level)
763 mi->malloc_for_level = XCNEWVEC (gimple, level + 1);
764 mi->max_malloced_level = level + 1;
766 else if (mi->max_malloced_level <= level)
768 mi->malloc_for_level
769 = XRESIZEVEC (gimple, mi->malloc_for_level, level + 1);
771 /* Zero the newly allocated items. */
772 memset (&(mi->malloc_for_level[mi->max_malloced_level + 1]),
773 0, (level - mi->max_malloced_level) * sizeof (tree));
775 mi->max_malloced_level = level + 1;
777 mi->malloc_for_level[level] = stmt;
780 /* Given an assignment statement STMT that we know that its
781 left-hand-side is the matrix MI variable, we traverse the immediate
782 uses backwards until we get to a malloc site. We make sure that
783 there is one and only one malloc site that sets this variable. When
784 we are performing the flattening we generate a new variable that
785 will hold the size for each dimension; each malloc that allocates a
786 dimension has the size parameter; we use that parameter to
787 initialize the dimension size variable so we can use it later in
788 the address calculations. LEVEL is the dimension we're inspecting.
789 Return if STMT is related to an allocation site. */
791 static void
792 analyze_matrix_allocation_site (struct matrix_info *mi, gimple stmt,
793 int level, sbitmap visited)
795 if (gimple_assign_copy_p (stmt) || gimple_assign_cast_p (stmt))
797 tree rhs = gimple_assign_rhs1 (stmt);
799 if (TREE_CODE (rhs) == SSA_NAME)
801 gimple def = SSA_NAME_DEF_STMT (rhs);
803 analyze_matrix_allocation_site (mi, def, level, visited);
804 return;
806 /* If we are back to the original matrix variable then we
807 are sure that this is analyzed as an access site. */
808 else if (rhs == mi->decl)
809 return;
811 /* A result of call to malloc. */
812 else if (is_gimple_call (stmt))
814 int call_flags = gimple_call_flags (stmt);
816 if (!(call_flags & ECF_MALLOC))
818 mark_min_matrix_escape_level (mi, level, stmt);
819 return;
821 else
823 tree malloc_fn_decl;
824 const char *malloc_fname;
826 malloc_fn_decl = gimple_call_fndecl (stmt);
827 if (malloc_fn_decl == NULL_TREE)
829 mark_min_matrix_escape_level (mi, level, stmt);
830 return;
832 malloc_fname = IDENTIFIER_POINTER (DECL_NAME (malloc_fn_decl));
833 if (DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
835 if (dump_file)
836 fprintf (dump_file,
837 "Matrix %s is an argument to function %s\n",
838 get_name (mi->decl), get_name (malloc_fn_decl));
839 mark_min_matrix_escape_level (mi, level, stmt);
840 return;
843 /* This is a call to malloc of level 'level'.
844 mi->max_malloced_level-1 == level means that we've
845 seen a malloc statement of level 'level' before.
846 If the statement is not the same one that we've
847 seen before, then there's another malloc statement
848 for the same level, which means that we need to mark
849 it escaping. */
850 if (mi->malloc_for_level
851 && mi->max_malloced_level-1 == level
852 && mi->malloc_for_level[level] != stmt)
854 mark_min_matrix_escape_level (mi, level, stmt);
855 return;
857 else
858 add_allocation_site (mi, stmt, level);
859 return;
861 /* Looks like we don't know what is happening in this
862 statement so be in the safe side and mark it as escaping. */
863 mark_min_matrix_escape_level (mi, level, stmt);
866 /* The transposing decision making.
867 In order to to calculate the profitability of transposing, we collect two
868 types of information regarding the accesses:
869 1. profiling information used to express the hotness of an access, that
870 is how often the matrix is accessed by this access site (count of the
871 access site).
872 2. which dimension in the access site is iterated by the inner
873 most loop containing this access.
875 The matrix will have a calculated value of weighted hotness for each
876 dimension.
877 Intuitively the hotness level of a dimension is a function of how
878 many times it was the most frequently accessed dimension in the
879 highly executed access sites of this matrix.
881 As computed by following equation:
882 m n
883 __ __
884 \ \ dim_hot_level[i] +=
885 /_ /_
886 j i
887 acc[j]->dim[i]->iter_by_inner_loop * count(j)
889 Where n is the number of dims and m is the number of the matrix
890 access sites. acc[j]->dim[i]->iter_by_inner_loop is 1 if acc[j]
891 iterates over dim[i] in innermost loop, and is 0 otherwise.
893 The organization of the new matrix should be according to the
894 hotness of each dimension. The hotness of the dimension implies
895 the locality of the elements.*/
896 static int
897 analyze_transpose (void **slot, void *data ATTRIBUTE_UNUSED)
899 struct matrix_info *mi = (struct matrix_info *) *slot;
900 int min_escape_l = mi->min_indirect_level_escape;
901 struct loop *loop;
902 affine_iv iv;
903 struct access_site_info *acc_info;
904 int i;
906 if (min_escape_l < 2 || !mi->access_l)
908 if (mi->access_l)
910 for (i = 0;
911 VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
912 i++)
913 free (acc_info);
914 VEC_free (access_site_info_p, heap, mi->access_l);
917 return 1;
919 if (!mi->dim_hot_level)
920 mi->dim_hot_level =
921 (gcov_type *) xcalloc (min_escape_l, sizeof (gcov_type));
924 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
925 i++)
927 if (gimple_assign_rhs_code (acc_info->stmt) == POINTER_PLUS_EXPR
928 && acc_info->level < min_escape_l)
930 loop = loop_containing_stmt (acc_info->stmt);
931 if (!loop || loop->inner)
933 free (acc_info);
934 continue;
936 if (simple_iv (loop, loop, acc_info->offset, &iv, true))
938 if (iv.step != NULL)
940 HOST_WIDE_INT istep;
942 istep = int_cst_value (iv.step);
943 if (istep != 0)
945 acc_info->iterated_by_inner_most_loop_p = 1;
946 mi->dim_hot_level[acc_info->level] +=
947 gimple_bb (acc_info->stmt)->count;
953 free (acc_info);
955 VEC_free (access_site_info_p, heap, mi->access_l);
957 return 1;
960 /* Find the index which defines the OFFSET from base.
961 We walk from use to def until we find how the offset was defined. */
962 static tree
963 get_index_from_offset (tree offset, gimple def_stmt)
965 tree op1, op2, index;
967 if (gimple_code (def_stmt) == GIMPLE_PHI)
968 return NULL;
969 if ((gimple_assign_copy_p (def_stmt) || gimple_assign_cast_p (def_stmt))
970 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
971 return get_index_from_offset (offset,
972 SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt)));
973 else if (is_gimple_assign (def_stmt)
974 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
976 op1 = gimple_assign_rhs1 (def_stmt);
977 op2 = gimple_assign_rhs2 (def_stmt);
978 if (TREE_CODE (op1) != INTEGER_CST && TREE_CODE (op2) != INTEGER_CST)
979 return NULL;
980 index = (TREE_CODE (op1) == INTEGER_CST) ? op2 : op1;
981 return index;
983 else
984 return NULL_TREE;
987 /* update MI->dimension_type_size[CURRENT_INDIRECT_LEVEL] with the size
988 of the type related to the SSA_VAR, or the type related to the
989 lhs of STMT, in the case that it is an INDIRECT_REF. */
990 static void
991 update_type_size (struct matrix_info *mi, gimple stmt, tree ssa_var,
992 int current_indirect_level)
994 tree lhs;
995 HOST_WIDE_INT type_size;
997 /* Update type according to the type of the INDIRECT_REF expr. */
998 if (is_gimple_assign (stmt)
999 && TREE_CODE (gimple_assign_lhs (stmt)) == INDIRECT_REF)
1001 lhs = gimple_assign_lhs (stmt);
1002 gcc_assert (POINTER_TYPE_P
1003 (TREE_TYPE (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
1004 type_size =
1005 int_size_in_bytes (TREE_TYPE
1006 (TREE_TYPE
1007 (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
1009 else
1010 type_size = int_size_in_bytes (TREE_TYPE (ssa_var));
1012 /* Record the size of elements accessed (as a whole)
1013 in the current indirection level (dimension). If the size of
1014 elements is not known at compile time, mark it as escaping. */
1015 if (type_size <= 0)
1016 mark_min_matrix_escape_level (mi, current_indirect_level, stmt);
1017 else
1019 int l = current_indirect_level;
1021 if (!mi->dimension_type_size)
1023 mi->dimension_type_size
1024 = (HOST_WIDE_INT *) xcalloc (l + 1, sizeof (HOST_WIDE_INT));
1025 mi->dimension_type_size_len = l + 1;
1027 else if (mi->dimension_type_size_len < l + 1)
1029 mi->dimension_type_size
1030 = (HOST_WIDE_INT *) xrealloc (mi->dimension_type_size,
1031 (l + 1) * sizeof (HOST_WIDE_INT));
1032 memset (&mi->dimension_type_size[mi->dimension_type_size_len],
1033 0, (l + 1 - mi->dimension_type_size_len)
1034 * sizeof (HOST_WIDE_INT));
1035 mi->dimension_type_size_len = l + 1;
1037 /* Make sure all the accesses in the same level have the same size
1038 of the type. */
1039 if (!mi->dimension_type_size[l])
1040 mi->dimension_type_size[l] = type_size;
1041 else if (mi->dimension_type_size[l] != type_size)
1042 mark_min_matrix_escape_level (mi, l, stmt);
1046 /* USE_STMT represents a GIMPLE_CALL, where one of the arguments is the
1047 ssa var that we want to check because it came from some use of matrix
1048 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so
1049 far. */
1051 static int
1052 analyze_accesses_for_call_stmt (struct matrix_info *mi, tree ssa_var,
1053 gimple use_stmt, int current_indirect_level)
1055 tree fndecl = gimple_call_fndecl (use_stmt);
1057 if (gimple_call_lhs (use_stmt))
1059 tree lhs = gimple_call_lhs (use_stmt);
1060 struct ssa_acc_in_tree lhs_acc, rhs_acc;
1062 memset (&lhs_acc, 0, sizeof (lhs_acc));
1063 memset (&rhs_acc, 0, sizeof (rhs_acc));
1065 lhs_acc.ssa_var = ssa_var;
1066 lhs_acc.t_code = ERROR_MARK;
1067 ssa_accessed_in_tree (lhs, &lhs_acc);
1068 rhs_acc.ssa_var = ssa_var;
1069 rhs_acc.t_code = ERROR_MARK;
1070 ssa_accessed_in_call_rhs (use_stmt, &rhs_acc);
1072 /* The SSA must be either in the left side or in the right side,
1073 to understand what is happening.
1074 In case the SSA_NAME is found in both sides we should be escaping
1075 at this level because in this case we cannot calculate the
1076 address correctly. */
1077 if ((lhs_acc.var_found && rhs_acc.var_found
1078 && lhs_acc.t_code == INDIRECT_REF)
1079 || (!rhs_acc.var_found && !lhs_acc.var_found))
1081 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1082 return current_indirect_level;
1084 gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1086 /* If we are storing to the matrix at some level, then mark it as
1087 escaping at that level. */
1088 if (lhs_acc.var_found)
1090 int l = current_indirect_level + 1;
1092 gcc_assert (lhs_acc.t_code == INDIRECT_REF);
1093 mark_min_matrix_escape_level (mi, l, use_stmt);
1094 return current_indirect_level;
1098 if (fndecl)
1100 if (DECL_FUNCTION_CODE (fndecl) != BUILT_IN_FREE)
1102 if (dump_file)
1103 fprintf (dump_file,
1104 "Matrix %s: Function call %s, level %d escapes.\n",
1105 get_name (mi->decl), get_name (fndecl),
1106 current_indirect_level);
1107 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1109 else if (mi->free_stmts[current_indirect_level].stmt != NULL
1110 && mi->free_stmts[current_indirect_level].stmt != use_stmt)
1111 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1112 else
1114 /*Record the free statements so we can delete them
1115 later. */
1116 int l = current_indirect_level;
1118 mi->free_stmts[l].stmt = use_stmt;
1119 mi->free_stmts[l].func = current_function_decl;
1122 return current_indirect_level;
1125 /* USE_STMT represents a phi node of the ssa var that we want to
1126 check because it came from some use of matrix
1128 We check all the escaping levels that get to the PHI node
1129 and make sure they are all the same escaping;
1130 if not (which is rare) we let the escaping level be the
1131 minimum level that gets into that PHI because starting from
1132 that level we cannot expect the behavior of the indirections.
1133 CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1135 static void
1136 analyze_accesses_for_phi_node (struct matrix_info *mi, gimple use_stmt,
1137 int current_indirect_level, sbitmap visited,
1138 bool record_accesses)
1141 struct matrix_access_phi_node tmp_maphi, *maphi, **pmaphi;
1143 tmp_maphi.phi = use_stmt;
1144 if ((maphi = (struct matrix_access_phi_node *)
1145 htab_find (htab_mat_acc_phi_nodes, &tmp_maphi)))
1147 if (maphi->indirection_level == current_indirect_level)
1148 return;
1149 else
1151 int level = MIN (maphi->indirection_level,
1152 current_indirect_level);
1153 size_t j;
1154 gimple stmt = NULL;
1156 maphi->indirection_level = level;
1157 for (j = 0; j < gimple_phi_num_args (use_stmt); j++)
1159 tree def = PHI_ARG_DEF (use_stmt, j);
1161 if (gimple_code (SSA_NAME_DEF_STMT (def)) != GIMPLE_PHI)
1162 stmt = SSA_NAME_DEF_STMT (def);
1164 mark_min_matrix_escape_level (mi, level, stmt);
1166 return;
1168 maphi = (struct matrix_access_phi_node *)
1169 xcalloc (1, sizeof (struct matrix_access_phi_node));
1170 maphi->phi = use_stmt;
1171 maphi->indirection_level = current_indirect_level;
1173 /* Insert to hash table. */
1174 pmaphi = (struct matrix_access_phi_node **)
1175 htab_find_slot (htab_mat_acc_phi_nodes, maphi, INSERT);
1176 gcc_assert (pmaphi);
1177 *pmaphi = maphi;
1179 if (!TEST_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt))))
1181 SET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1182 analyze_matrix_accesses (mi, PHI_RESULT (use_stmt),
1183 current_indirect_level, false, visited,
1184 record_accesses);
1185 RESET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1189 /* USE_STMT represents an assign statement (the rhs or lhs include
1190 the ssa var that we want to check because it came from some use of matrix
1191 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1193 static int
1194 analyze_accesses_for_assign_stmt (struct matrix_info *mi, tree ssa_var,
1195 gimple use_stmt, int current_indirect_level,
1196 bool last_op, sbitmap visited,
1197 bool record_accesses)
1199 tree lhs = gimple_get_lhs (use_stmt);
1200 struct ssa_acc_in_tree lhs_acc, rhs_acc;
1202 memset (&lhs_acc, 0, sizeof (lhs_acc));
1203 memset (&rhs_acc, 0, sizeof (rhs_acc));
1205 lhs_acc.ssa_var = ssa_var;
1206 lhs_acc.t_code = ERROR_MARK;
1207 ssa_accessed_in_tree (lhs, &lhs_acc);
1208 rhs_acc.ssa_var = ssa_var;
1209 rhs_acc.t_code = ERROR_MARK;
1210 ssa_accessed_in_assign_rhs (use_stmt, &rhs_acc);
1212 /* The SSA must be either in the left side or in the right side,
1213 to understand what is happening.
1214 In case the SSA_NAME is found in both sides we should be escaping
1215 at this level because in this case we cannot calculate the
1216 address correctly. */
1217 if ((lhs_acc.var_found && rhs_acc.var_found
1218 && lhs_acc.t_code == INDIRECT_REF)
1219 || (!rhs_acc.var_found && !lhs_acc.var_found))
1221 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1222 return current_indirect_level;
1224 gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1226 /* If we are storing to the matrix at some level, then mark it as
1227 escaping at that level. */
1228 if (lhs_acc.var_found)
1230 int l = current_indirect_level + 1;
1232 gcc_assert (lhs_acc.t_code == INDIRECT_REF);
1234 if (!(gimple_assign_copy_p (use_stmt)
1235 || gimple_assign_cast_p (use_stmt))
1236 || (TREE_CODE (gimple_assign_rhs1 (use_stmt)) != SSA_NAME))
1237 mark_min_matrix_escape_level (mi, l, use_stmt);
1238 else
1240 gimple def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (use_stmt));
1241 analyze_matrix_allocation_site (mi, def_stmt, l, visited);
1242 if (record_accesses)
1243 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1244 NULL_TREE, l, true);
1245 update_type_size (mi, use_stmt, NULL, l);
1247 return current_indirect_level;
1249 /* Now, check the right-hand-side, to see how the SSA variable
1250 is used. */
1251 if (rhs_acc.var_found)
1253 if (rhs_acc.t_code != INDIRECT_REF
1254 && rhs_acc.t_code != POINTER_PLUS_EXPR && rhs_acc.t_code != SSA_NAME)
1256 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1257 return current_indirect_level;
1259 /* If the access in the RHS has an indirection increase the
1260 indirection level. */
1261 if (rhs_acc.t_code == INDIRECT_REF)
1263 if (record_accesses)
1264 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1265 NULL_TREE,
1266 current_indirect_level, true);
1267 current_indirect_level += 1;
1269 else if (rhs_acc.t_code == POINTER_PLUS_EXPR)
1271 gcc_assert (rhs_acc.second_op);
1272 if (last_op)
1273 /* Currently we support only one PLUS expression on the
1274 SSA_NAME that holds the base address of the current
1275 indirection level; to support more general case there
1276 is a need to hold a stack of expressions and regenerate
1277 the calculation later. */
1278 mark_min_matrix_escape_level (mi, current_indirect_level,
1279 use_stmt);
1280 else
1282 tree index;
1283 tree op1, op2;
1285 op1 = gimple_assign_rhs1 (use_stmt);
1286 op2 = gimple_assign_rhs2 (use_stmt);
1288 op2 = (op1 == ssa_var) ? op2 : op1;
1289 if (TREE_CODE (op2) == INTEGER_CST)
1290 index =
1291 build_int_cst (TREE_TYPE (op1),
1292 TREE_INT_CST_LOW (op2) /
1293 int_size_in_bytes (TREE_TYPE (op1)));
1294 else
1296 index =
1297 get_index_from_offset (op2, SSA_NAME_DEF_STMT (op2));
1298 if (index == NULL_TREE)
1300 mark_min_matrix_escape_level (mi,
1301 current_indirect_level,
1302 use_stmt);
1303 return current_indirect_level;
1306 if (record_accesses)
1307 record_access_alloc_site_info (mi, use_stmt, op2,
1308 index,
1309 current_indirect_level, false);
1312 /* If we are storing this level of indirection mark it as
1313 escaping. */
1314 if (lhs_acc.t_code == INDIRECT_REF || TREE_CODE (lhs) != SSA_NAME)
1316 int l = current_indirect_level;
1318 /* One exception is when we are storing to the matrix
1319 variable itself; this is the case of malloc, we must make
1320 sure that it's the one and only one call to malloc so
1321 we call analyze_matrix_allocation_site to check
1322 this out. */
1323 if (TREE_CODE (lhs) != VAR_DECL || lhs != mi->decl)
1324 mark_min_matrix_escape_level (mi, current_indirect_level,
1325 use_stmt);
1326 else
1328 /* Also update the escaping level. */
1329 analyze_matrix_allocation_site (mi, use_stmt, l, visited);
1330 if (record_accesses)
1331 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1332 NULL_TREE, l, true);
1335 else
1337 /* We are placing it in an SSA, follow that SSA. */
1338 analyze_matrix_accesses (mi, lhs,
1339 current_indirect_level,
1340 rhs_acc.t_code == POINTER_PLUS_EXPR,
1341 visited, record_accesses);
1344 return current_indirect_level;
1347 /* Given a SSA_VAR (coming from a use statement of the matrix MI),
1348 follow its uses and level of indirection and find out the minimum
1349 indirection level it escapes in (the highest dimension) and the maximum
1350 level it is accessed in (this will be the actual dimension of the
1351 matrix). The information is accumulated in MI.
1352 We look at the immediate uses, if one escapes we finish; if not,
1353 we make a recursive call for each one of the immediate uses of the
1354 resulting SSA name. */
1355 static void
1356 analyze_matrix_accesses (struct matrix_info *mi, tree ssa_var,
1357 int current_indirect_level, bool last_op,
1358 sbitmap visited, bool record_accesses)
1360 imm_use_iterator imm_iter;
1361 use_operand_p use_p;
1363 update_type_size (mi, SSA_NAME_DEF_STMT (ssa_var), ssa_var,
1364 current_indirect_level);
1366 /* We don't go beyond the escaping level when we are performing the
1367 flattening. NOTE: we keep the last indirection level that doesn't
1368 escape. */
1369 if (mi->min_indirect_level_escape > -1
1370 && mi->min_indirect_level_escape <= current_indirect_level)
1371 return;
1373 /* Now go over the uses of the SSA_NAME and check how it is used in
1374 each one of them. We are mainly looking for the pattern INDIRECT_REF,
1375 then a POINTER_PLUS_EXPR, then INDIRECT_REF etc. while in between there could
1376 be any number of copies and casts. */
1377 gcc_assert (TREE_CODE (ssa_var) == SSA_NAME);
1379 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_var)
1381 gimple use_stmt = USE_STMT (use_p);
1382 if (gimple_code (use_stmt) == GIMPLE_PHI)
1383 /* We check all the escaping levels that get to the PHI node
1384 and make sure they are all the same escaping;
1385 if not (which is rare) we let the escaping level be the
1386 minimum level that gets into that PHI because starting from
1387 that level we cannot expect the behavior of the indirections. */
1389 analyze_accesses_for_phi_node (mi, use_stmt, current_indirect_level,
1390 visited, record_accesses);
1392 else if (is_gimple_call (use_stmt))
1393 analyze_accesses_for_call_stmt (mi, ssa_var, use_stmt,
1394 current_indirect_level);
1395 else if (is_gimple_assign (use_stmt))
1396 current_indirect_level =
1397 analyze_accesses_for_assign_stmt (mi, ssa_var, use_stmt,
1398 current_indirect_level, last_op,
1399 visited, record_accesses);
1403 typedef struct
1405 tree fn;
1406 gimple stmt;
1407 } check_var_data;
1409 /* A walk_tree function to go over the VAR_DECL, PARM_DECL nodes of
1410 the malloc size expression and check that those aren't changed
1411 over the function. */
1412 static tree
1413 check_var_notmodified_p (tree * tp, int *walk_subtrees, void *data)
1415 basic_block bb;
1416 tree t = *tp;
1417 check_var_data *callback_data = (check_var_data*) data;
1418 tree fn = callback_data->fn;
1419 gimple_stmt_iterator gsi;
1420 gimple stmt;
1422 if (TREE_CODE (t) != VAR_DECL && TREE_CODE (t) != PARM_DECL)
1423 return NULL_TREE;
1425 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (fn))
1427 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1429 stmt = gsi_stmt (gsi);
1430 if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
1431 continue;
1432 if (gimple_get_lhs (stmt) == t)
1434 callback_data->stmt = stmt;
1435 return t;
1439 *walk_subtrees = 1;
1440 return NULL_TREE;
1443 /* Go backwards in the use-def chains and find out the expression
1444 represented by the possible SSA name in STMT, until it is composed
1445 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1446 we make sure that all the arguments represent the same subexpression,
1447 otherwise we fail. */
1449 static tree
1450 can_calculate_stmt_before_stmt (gimple stmt, sbitmap visited)
1452 tree op1, op2, res;
1453 enum tree_code code;
1455 switch (gimple_code (stmt))
1457 case GIMPLE_ASSIGN:
1458 code = gimple_assign_rhs_code (stmt);
1459 op1 = gimple_assign_rhs1 (stmt);
1461 switch (code)
1463 case POINTER_PLUS_EXPR:
1464 case PLUS_EXPR:
1465 case MINUS_EXPR:
1466 case MULT_EXPR:
1468 op2 = gimple_assign_rhs2 (stmt);
1469 op1 = can_calculate_expr_before_stmt (op1, visited);
1470 if (!op1)
1471 return NULL_TREE;
1472 op2 = can_calculate_expr_before_stmt (op2, visited);
1473 if (op2)
1474 return fold_build2 (code, gimple_expr_type (stmt), op1, op2);
1475 return NULL_TREE;
1477 CASE_CONVERT:
1478 res = can_calculate_expr_before_stmt (op1, visited);
1479 if (res != NULL_TREE)
1480 return build1 (code, gimple_expr_type (stmt), res);
1481 else
1482 return NULL_TREE;
1484 default:
1485 if (gimple_assign_single_p (stmt))
1486 return can_calculate_expr_before_stmt (op1, visited);
1487 else
1488 return NULL_TREE;
1491 case GIMPLE_PHI:
1493 size_t j;
1495 res = NULL_TREE;
1496 /* Make sure all the arguments represent the same value. */
1497 for (j = 0; j < gimple_phi_num_args (stmt); j++)
1499 tree new_res;
1500 tree def = PHI_ARG_DEF (stmt, j);
1502 new_res = can_calculate_expr_before_stmt (def, visited);
1503 if (res == NULL_TREE)
1504 res = new_res;
1505 else if (!new_res || !expressions_equal_p (res, new_res))
1506 return NULL_TREE;
1508 return res;
1511 default:
1512 return NULL_TREE;
1516 /* Go backwards in the use-def chains and find out the expression
1517 represented by the possible SSA name in EXPR, until it is composed
1518 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1519 we make sure that all the arguments represent the same subexpression,
1520 otherwise we fail. */
1521 static tree
1522 can_calculate_expr_before_stmt (tree expr, sbitmap visited)
1524 gimple def_stmt;
1525 tree res;
1527 switch (TREE_CODE (expr))
1529 case SSA_NAME:
1530 /* Case of loop, we don't know to represent this expression. */
1531 if (TEST_BIT (visited, SSA_NAME_VERSION (expr)))
1532 return NULL_TREE;
1534 SET_BIT (visited, SSA_NAME_VERSION (expr));
1535 def_stmt = SSA_NAME_DEF_STMT (expr);
1536 res = can_calculate_stmt_before_stmt (def_stmt, visited);
1537 RESET_BIT (visited, SSA_NAME_VERSION (expr));
1538 return res;
1539 case VAR_DECL:
1540 case PARM_DECL:
1541 case INTEGER_CST:
1542 return expr;
1544 default:
1545 return NULL_TREE;
1549 /* There should be only one allocation function for the dimensions
1550 that don't escape. Here we check the allocation sites in this
1551 function. We must make sure that all the dimensions are allocated
1552 using malloc and that the malloc size parameter expression could be
1553 pre-calculated before the call to the malloc of dimension 0.
1555 Given a candidate matrix for flattening -- MI -- check if it's
1556 appropriate for flattening -- we analyze the allocation
1557 sites that we recorded in the previous analysis. The result of the
1558 analysis is a level of indirection (matrix dimension) in which the
1559 flattening is safe. We check the following conditions:
1560 1. There is only one allocation site for each dimension.
1561 2. The allocation sites of all the dimensions are in the same
1562 function.
1563 (The above two are being taken care of during the analysis when
1564 we check the allocation site).
1565 3. All the dimensions that we flatten are allocated at once; thus
1566 the total size must be known before the allocation of the
1567 dimension 0 (top level) -- we must make sure we represent the
1568 size of the allocation as an expression of global parameters or
1569 constants and that those doesn't change over the function. */
1571 static int
1572 check_allocation_function (void **slot, void *data ATTRIBUTE_UNUSED)
1574 int level;
1575 gimple_stmt_iterator gsi;
1576 basic_block bb_level_0;
1577 struct matrix_info *mi = (struct matrix_info *) *slot;
1578 sbitmap visited;
1580 if (!mi->malloc_for_level)
1581 return 1;
1583 visited = sbitmap_alloc (num_ssa_names);
1585 /* Do nothing if the current function is not the allocation
1586 function of MI. */
1587 if (mi->allocation_function_decl != current_function_decl
1588 /* We aren't in the main allocation function yet. */
1589 || !mi->malloc_for_level[0])
1590 return 1;
1592 for (level = 1; level < mi->max_malloced_level; level++)
1593 if (!mi->malloc_for_level[level])
1594 break;
1596 mark_min_matrix_escape_level (mi, level, NULL);
1598 gsi = gsi_for_stmt (mi->malloc_for_level[0]);
1599 bb_level_0 = gsi.bb;
1601 /* Check if the expression of the size passed to malloc could be
1602 pre-calculated before the malloc of level 0. */
1603 for (level = 1; level < mi->min_indirect_level_escape; level++)
1605 gimple call_stmt;
1606 tree size;
1607 struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE};
1609 call_stmt = mi->malloc_for_level[level];
1611 /* Find the correct malloc information. */
1612 collect_data_for_malloc_call (call_stmt, &mcd);
1614 /* No need to check anticipation for constants. */
1615 if (TREE_CODE (mcd.size_var) == INTEGER_CST)
1617 if (!mi->dimension_size)
1619 mi->dimension_size =
1620 (tree *) xcalloc (mi->min_indirect_level_escape,
1621 sizeof (tree));
1622 mi->dimension_size_orig =
1623 (tree *) xcalloc (mi->min_indirect_level_escape,
1624 sizeof (tree));
1626 mi->dimension_size[level] = mcd.size_var;
1627 mi->dimension_size_orig[level] = mcd.size_var;
1628 continue;
1630 /* ??? Here we should also add the way to calculate the size
1631 expression not only know that it is anticipated. */
1632 sbitmap_zero (visited);
1633 size = can_calculate_expr_before_stmt (mcd.size_var, visited);
1634 if (size == NULL_TREE)
1636 mark_min_matrix_escape_level (mi, level, call_stmt);
1637 if (dump_file)
1638 fprintf (dump_file,
1639 "Matrix %s: Cannot calculate the size of allocation, escaping at level %d\n",
1640 get_name (mi->decl), level);
1641 break;
1643 if (!mi->dimension_size)
1645 mi->dimension_size =
1646 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1647 mi->dimension_size_orig =
1648 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1650 mi->dimension_size[level] = size;
1651 mi->dimension_size_orig[level] = size;
1654 /* We don't need those anymore. */
1655 for (level = mi->min_indirect_level_escape;
1656 level < mi->max_malloced_level; level++)
1657 mi->malloc_for_level[level] = NULL;
1658 return 1;
1661 /* Track all access and allocation sites. */
1662 static void
1663 find_sites_in_func (bool record)
1665 sbitmap visited_stmts_1;
1667 gimple_stmt_iterator gsi;
1668 gimple stmt;
1669 basic_block bb;
1670 struct matrix_info tmpmi, *mi;
1672 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1674 FOR_EACH_BB (bb)
1676 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1678 tree lhs;
1680 stmt = gsi_stmt (gsi);
1681 lhs = gimple_get_lhs (stmt);
1682 if (lhs != NULL_TREE
1683 && TREE_CODE (lhs) == VAR_DECL)
1685 tmpmi.decl = lhs;
1686 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1687 &tmpmi)))
1689 sbitmap_zero (visited_stmts_1);
1690 analyze_matrix_allocation_site (mi, stmt, 0, visited_stmts_1);
1693 if (is_gimple_assign (stmt)
1694 && gimple_assign_single_p (stmt)
1695 && TREE_CODE (lhs) == SSA_NAME
1696 && TREE_CODE (gimple_assign_rhs1 (stmt)) == VAR_DECL)
1698 tmpmi.decl = gimple_assign_rhs1 (stmt);
1699 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1700 &tmpmi)))
1702 sbitmap_zero (visited_stmts_1);
1703 analyze_matrix_accesses (mi, lhs, 0,
1704 false, visited_stmts_1, record);
1709 sbitmap_free (visited_stmts_1);
1712 /* Traverse the use-def chains to see if there are matrices that
1713 are passed through pointers and we cannot know how they are accessed.
1714 For each SSA-name defined by a global variable of our interest,
1715 we traverse the use-def chains of the SSA and follow the indirections,
1716 and record in what level of indirection the use of the variable
1717 escapes. A use of a pointer escapes when it is passed to a function,
1718 stored into memory or assigned (except in malloc and free calls). */
1720 static void
1721 record_all_accesses_in_func (void)
1723 unsigned i;
1724 sbitmap visited_stmts_1;
1726 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1728 for (i = 0; i < num_ssa_names; i++)
1730 struct matrix_info tmpmi, *mi;
1731 tree ssa_var = ssa_name (i);
1732 tree rhs, lhs;
1734 if (!ssa_var
1735 || !is_gimple_assign (SSA_NAME_DEF_STMT (ssa_var))
1736 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (ssa_var)))
1737 continue;
1738 rhs = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (ssa_var));
1739 lhs = gimple_assign_lhs (SSA_NAME_DEF_STMT (ssa_var));
1740 if (TREE_CODE (rhs) != VAR_DECL && TREE_CODE (lhs) != VAR_DECL)
1741 continue;
1743 /* If the RHS is a matrix that we want to analyze, follow the def-use
1744 chain for this SSA_VAR and check for escapes or apply the
1745 flattening. */
1746 tmpmi.decl = rhs;
1747 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi)))
1749 /* This variable will track the visited PHI nodes, so we can limit
1750 its size to the maximum number of SSA names. */
1751 sbitmap_zero (visited_stmts_1);
1752 analyze_matrix_accesses (mi, ssa_var,
1753 0, false, visited_stmts_1, true);
1757 sbitmap_free (visited_stmts_1);
1760 /* Used when we want to convert the expression: RESULT = something *
1761 ORIG to RESULT = something * NEW_VAL. If ORIG and NEW_VAL are power
1762 of 2, shift operations can be done, else division and
1763 multiplication. */
1765 static tree
1766 compute_offset (HOST_WIDE_INT orig, HOST_WIDE_INT new_val, tree result)
1769 int x, y;
1770 tree result1, ratio, log, orig_tree, new_tree;
1772 x = exact_log2 (orig);
1773 y = exact_log2 (new_val);
1775 if (x != -1 && y != -1)
1777 if (x == y)
1778 return result;
1779 else if (x > y)
1781 log = build_int_cst (TREE_TYPE (result), x - y);
1782 result1 =
1783 fold_build2 (LSHIFT_EXPR, TREE_TYPE (result), result, log);
1784 return result1;
1786 log = build_int_cst (TREE_TYPE (result), y - x);
1787 result1 = fold_build2 (RSHIFT_EXPR, TREE_TYPE (result), result, log);
1789 return result1;
1791 orig_tree = build_int_cst (TREE_TYPE (result), orig);
1792 new_tree = build_int_cst (TREE_TYPE (result), new_val);
1793 ratio = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (result), result, orig_tree);
1794 result1 = fold_build2 (MULT_EXPR, TREE_TYPE (result), ratio, new_tree);
1796 return result1;
1800 /* We know that we are allowed to perform matrix flattening (according to the
1801 escape analysis), so we traverse the use-def chains of the SSA vars
1802 defined by the global variables pointing to the matrices of our interest.
1803 in each use of the SSA we calculate the offset from the base address
1804 according to the following equation:
1806 a[I1][I2]...[Ik] , where D1..Dk is the length of each dimension and the
1807 escaping level is m <= k, and a' is the new allocated matrix,
1808 will be translated to :
1810 b[I(m+1)]...[Ik]
1812 where
1813 b = a' + I1*D2...*Dm + I2*D3...Dm + ... + Im
1816 static int
1817 transform_access_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1819 gimple_stmt_iterator gsi;
1820 struct matrix_info *mi = (struct matrix_info *) *slot;
1821 int min_escape_l = mi->min_indirect_level_escape;
1822 struct access_site_info *acc_info;
1823 enum tree_code code;
1824 int i;
1826 if (min_escape_l < 2 || !mi->access_l)
1827 return 1;
1828 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
1829 i++)
1831 /* This is possible because we collect the access sites before
1832 we determine the final minimum indirection level. */
1833 if (acc_info->level >= min_escape_l)
1835 free (acc_info);
1836 continue;
1838 if (acc_info->is_alloc)
1840 if (acc_info->level >= 0 && gimple_bb (acc_info->stmt))
1842 ssa_op_iter iter;
1843 tree def;
1844 gimple stmt = acc_info->stmt;
1845 tree lhs;
1847 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
1848 mark_sym_for_renaming (SSA_NAME_VAR (def));
1849 gsi = gsi_for_stmt (stmt);
1850 gcc_assert (is_gimple_assign (acc_info->stmt));
1851 lhs = gimple_assign_lhs (acc_info->stmt);
1852 if (TREE_CODE (lhs) == SSA_NAME
1853 && acc_info->level < min_escape_l - 1)
1855 imm_use_iterator imm_iter;
1856 use_operand_p use_p;
1857 gimple use_stmt;
1859 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
1860 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1862 tree rhs, tmp;
1863 gimple new_stmt;
1865 gcc_assert (gimple_assign_rhs_code (acc_info->stmt)
1866 == INDIRECT_REF);
1867 /* Emit convert statement to convert to type of use. */
1868 tmp = create_tmp_var (TREE_TYPE (lhs), "new");
1869 add_referenced_var (tmp);
1870 rhs = gimple_assign_rhs1 (acc_info->stmt);
1871 rhs = fold_convert (TREE_TYPE (tmp),
1872 TREE_OPERAND (rhs, 0));
1873 new_stmt = gimple_build_assign (tmp, rhs);
1874 tmp = make_ssa_name (tmp, new_stmt);
1875 gimple_assign_set_lhs (new_stmt, tmp);
1876 gsi = gsi_for_stmt (acc_info->stmt);
1877 gsi_insert_after (&gsi, new_stmt, GSI_SAME_STMT);
1878 SET_USE (use_p, tmp);
1881 if (acc_info->level < min_escape_l - 1)
1882 gsi_remove (&gsi, true);
1884 free (acc_info);
1885 continue;
1887 code = gimple_assign_rhs_code (acc_info->stmt);
1888 if (code == INDIRECT_REF
1889 && acc_info->level < min_escape_l - 1)
1891 /* Replace the INDIRECT_REF with NOP (cast) usually we are casting
1892 from "pointer to type" to "type". */
1893 tree t =
1894 build1 (NOP_EXPR, TREE_TYPE (gimple_assign_rhs1 (acc_info->stmt)),
1895 TREE_OPERAND (gimple_assign_rhs1 (acc_info->stmt), 0));
1896 gimple_assign_set_rhs_code (acc_info->stmt, NOP_EXPR);
1897 gimple_assign_set_rhs1 (acc_info->stmt, t);
1899 else if (code == POINTER_PLUS_EXPR
1900 && acc_info->level < (min_escape_l))
1902 imm_use_iterator imm_iter;
1903 use_operand_p use_p;
1905 tree offset;
1906 int k = acc_info->level;
1907 tree num_elements, total_elements;
1908 tree tmp1;
1909 tree d_size = mi->dimension_size[k];
1911 /* We already make sure in the analysis that the first operand
1912 is the base and the second is the offset. */
1913 offset = acc_info->offset;
1914 if (mi->dim_map[k] == min_escape_l - 1)
1916 if (!check_transpose_p || mi->is_transposed_p == false)
1917 tmp1 = offset;
1918 else
1920 tree new_offset;
1921 tree d_type_size, d_type_size_k;
1923 d_type_size = size_int (mi->dimension_type_size[min_escape_l]);
1924 d_type_size_k = size_int (mi->dimension_type_size[k + 1]);
1926 new_offset =
1927 compute_offset (mi->dimension_type_size[min_escape_l],
1928 mi->dimension_type_size[k + 1], offset);
1930 total_elements = new_offset;
1931 if (new_offset != offset)
1933 gsi = gsi_for_stmt (acc_info->stmt);
1934 tmp1 = force_gimple_operand_gsi (&gsi, total_elements,
1935 true, NULL,
1936 true, GSI_SAME_STMT);
1938 else
1939 tmp1 = offset;
1942 else
1944 d_size = mi->dimension_size[mi->dim_map[k] + 1];
1945 num_elements =
1946 fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, acc_info->index),
1947 fold_convert (sizetype, d_size));
1948 add_referenced_var (d_size);
1949 gsi = gsi_for_stmt (acc_info->stmt);
1950 tmp1 = force_gimple_operand_gsi (&gsi, num_elements, true,
1951 NULL, true, GSI_SAME_STMT);
1953 /* Replace the offset if needed. */
1954 if (tmp1 != offset)
1956 if (TREE_CODE (offset) == SSA_NAME)
1958 gimple use_stmt;
1960 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, offset)
1961 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1962 if (use_stmt == acc_info->stmt)
1963 SET_USE (use_p, tmp1);
1965 else
1967 gcc_assert (TREE_CODE (offset) == INTEGER_CST);
1968 gimple_assign_set_rhs2 (acc_info->stmt, tmp1);
1969 update_stmt (acc_info->stmt);
1973 /* ??? meanwhile this happens because we record the same access
1974 site more than once; we should be using a hash table to
1975 avoid this and insert the STMT of the access site only
1976 once.
1977 else
1978 gcc_unreachable (); */
1979 free (acc_info);
1981 VEC_free (access_site_info_p, heap, mi->access_l);
1983 update_ssa (TODO_update_ssa);
1984 #ifdef ENABLE_CHECKING
1985 verify_ssa (true);
1986 #endif
1987 return 1;
1990 /* Sort A array of counts. Arrange DIM_MAP to reflect the new order. */
1992 static void
1993 sort_dim_hot_level (gcov_type * a, int *dim_map, int n)
1995 int i, j, tmp1;
1996 gcov_type tmp;
1998 for (i = 0; i < n - 1; i++)
2000 for (j = 0; j < n - 1 - i; j++)
2002 if (a[j + 1] < a[j])
2004 tmp = a[j]; /* swap a[j] and a[j+1] */
2005 a[j] = a[j + 1];
2006 a[j + 1] = tmp;
2007 tmp1 = dim_map[j];
2008 dim_map[j] = dim_map[j + 1];
2009 dim_map[j + 1] = tmp1;
2015 /* Replace multiple mallocs (one for each dimension) to one malloc
2016 with the size of DIM1*DIM2*...*DIMN*size_of_element
2017 Make sure that we hold the size in the malloc site inside a
2018 new global variable; this way we ensure that the size doesn't
2019 change and it is accessible from all the other functions that
2020 uses the matrix. Also, the original calls to free are deleted,
2021 and replaced by a new call to free the flattened matrix. */
2023 static int
2024 transform_allocation_sites (void **slot, void *data ATTRIBUTE_UNUSED)
2026 int i;
2027 struct matrix_info *mi;
2028 tree type, oldfn, prev_dim_size;
2029 gimple call_stmt_0, use_stmt;
2030 struct cgraph_node *c_node;
2031 struct cgraph_edge *e;
2032 gimple_stmt_iterator gsi;
2033 struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE};
2034 HOST_WIDE_INT element_size;
2036 imm_use_iterator imm_iter;
2037 use_operand_p use_p;
2038 tree old_size_0, tmp;
2039 int min_escape_l;
2040 int id;
2042 mi = (struct matrix_info *) *slot;
2044 min_escape_l = mi->min_indirect_level_escape;
2046 if (!mi->malloc_for_level)
2047 mi->min_indirect_level_escape = 0;
2049 if (mi->min_indirect_level_escape < 2)
2050 return 1;
2052 mi->dim_map = (int *) xcalloc (mi->min_indirect_level_escape, sizeof (int));
2053 for (i = 0; i < mi->min_indirect_level_escape; i++)
2054 mi->dim_map[i] = i;
2055 if (check_transpose_p)
2057 int i;
2059 if (dump_file)
2061 fprintf (dump_file, "Matrix %s:\n", get_name (mi->decl));
2062 for (i = 0; i < min_escape_l; i++)
2064 fprintf (dump_file, "dim %d before sort ", i);
2065 if (mi->dim_hot_level)
2066 fprintf (dump_file,
2067 "count is " HOST_WIDEST_INT_PRINT_DEC " \n",
2068 mi->dim_hot_level[i]);
2071 sort_dim_hot_level (mi->dim_hot_level, mi->dim_map,
2072 mi->min_indirect_level_escape);
2073 if (dump_file)
2074 for (i = 0; i < min_escape_l; i++)
2076 fprintf (dump_file, "dim %d after sort\n", i);
2077 if (mi->dim_hot_level)
2078 fprintf (dump_file, "count is " HOST_WIDE_INT_PRINT_DEC
2079 " \n", (HOST_WIDE_INT) mi->dim_hot_level[i]);
2081 for (i = 0; i < mi->min_indirect_level_escape; i++)
2083 if (dump_file)
2084 fprintf (dump_file, "dim_map[%d] after sort %d\n", i,
2085 mi->dim_map[i]);
2086 if (mi->dim_map[i] != i)
2088 if (dump_file)
2089 fprintf (dump_file,
2090 "Transposed dimensions: dim %d is now dim %d\n",
2091 mi->dim_map[i], i);
2092 mi->is_transposed_p = true;
2096 else
2098 for (i = 0; i < mi->min_indirect_level_escape; i++)
2099 mi->dim_map[i] = i;
2101 /* Call statement of allocation site of level 0. */
2102 call_stmt_0 = mi->malloc_for_level[0];
2104 /* Finds the correct malloc information. */
2105 collect_data_for_malloc_call (call_stmt_0, &mcd);
2107 mi->dimension_size[0] = mcd.size_var;
2108 mi->dimension_size_orig[0] = mcd.size_var;
2109 /* Make sure that the variables in the size expression for
2110 all the dimensions (above level 0) aren't modified in
2111 the allocation function. */
2112 for (i = 1; i < mi->min_indirect_level_escape; i++)
2114 tree t;
2115 check_var_data data;
2117 /* mi->dimension_size must contain the expression of the size calculated
2118 in check_allocation_function. */
2119 gcc_assert (mi->dimension_size[i]);
2121 data.fn = mi->allocation_function_decl;
2122 data.stmt = NULL;
2123 t = walk_tree_without_duplicates (&(mi->dimension_size[i]),
2124 check_var_notmodified_p,
2125 &data);
2126 if (t != NULL_TREE)
2128 mark_min_matrix_escape_level (mi, i, data.stmt);
2129 break;
2133 if (mi->min_indirect_level_escape < 2)
2134 return 1;
2136 /* Since we should make sure that the size expression is available
2137 before the call to malloc of level 0. */
2138 gsi = gsi_for_stmt (call_stmt_0);
2140 /* Find out the size of each dimension by looking at the malloc
2141 sites and create a global variable to hold it.
2142 We add the assignment to the global before the malloc of level 0. */
2144 /* To be able to produce gimple temporaries. */
2145 oldfn = current_function_decl;
2146 current_function_decl = mi->allocation_function_decl;
2147 push_cfun (DECL_STRUCT_FUNCTION (mi->allocation_function_decl));
2149 /* Set the dimension sizes as follows:
2150 DIM_SIZE[i] = DIM_SIZE[n] * ... * DIM_SIZE[i]
2151 where n is the maximum non escaping level. */
2152 element_size = mi->dimension_type_size[mi->min_indirect_level_escape];
2153 prev_dim_size = NULL_TREE;
2155 for (i = mi->min_indirect_level_escape - 1; i >= 0; i--)
2157 tree dim_size, dim_var;
2158 gimple stmt;
2159 tree d_type_size;
2161 /* Now put the size expression in a global variable and initialize it to
2162 the size expression before the malloc of level 0. */
2163 dim_var =
2164 add_new_static_var (TREE_TYPE
2165 (mi->dimension_size_orig[mi->dim_map[i]]));
2166 type = TREE_TYPE (mi->dimension_size_orig[mi->dim_map[i]]);
2168 /* DIM_SIZE = MALLOC_SIZE_PARAM / TYPE_SIZE. */
2169 /* Find which dim ID becomes dim I. */
2170 for (id = 0; id < mi->min_indirect_level_escape; id++)
2171 if (mi->dim_map[id] == i)
2172 break;
2173 d_type_size =
2174 build_int_cst (type, mi->dimension_type_size[id + 1]);
2175 if (!prev_dim_size)
2176 prev_dim_size = build_int_cst (type, element_size);
2177 if (!check_transpose_p && i == mi->min_indirect_level_escape - 1)
2179 dim_size = mi->dimension_size_orig[id];
2181 else
2183 dim_size =
2184 fold_build2 (TRUNC_DIV_EXPR, type, mi->dimension_size_orig[id],
2185 d_type_size);
2187 dim_size = fold_build2 (MULT_EXPR, type, dim_size, prev_dim_size);
2189 dim_size = force_gimple_operand_gsi (&gsi, dim_size, true, NULL,
2190 true, GSI_SAME_STMT);
2191 /* GLOBAL_HOLDING_THE_SIZE = DIM_SIZE. */
2192 stmt = gimple_build_assign (dim_var, dim_size);
2193 mark_symbols_for_renaming (stmt);
2194 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2196 prev_dim_size = mi->dimension_size[i] = dim_var;
2198 update_ssa (TODO_update_ssa);
2199 /* Replace the malloc size argument in the malloc of level 0 to be
2200 the size of all the dimensions. */
2201 c_node = cgraph_node (mi->allocation_function_decl);
2202 old_size_0 = gimple_call_arg (call_stmt_0, 0);
2203 tmp = force_gimple_operand_gsi (&gsi, mi->dimension_size[0], true,
2204 NULL, true, GSI_SAME_STMT);
2205 if (TREE_CODE (old_size_0) == SSA_NAME)
2207 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, old_size_0)
2208 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2209 if (use_stmt == call_stmt_0)
2210 SET_USE (use_p, tmp);
2212 /* When deleting the calls to malloc we need also to remove the edge from
2213 the call graph to keep it consistent. Notice that cgraph_edge may
2214 create a new node in the call graph if there is no node for the given
2215 declaration; this shouldn't be the case but currently there is no way to
2216 check this outside of "cgraph.c". */
2217 for (i = 1; i < mi->min_indirect_level_escape; i++)
2219 gimple_stmt_iterator gsi;
2220 gimple use_stmt1 = NULL;
2222 gimple call_stmt = mi->malloc_for_level[i];
2223 gcc_assert (is_gimple_call (call_stmt));
2224 e = cgraph_edge (c_node, call_stmt);
2225 gcc_assert (e);
2226 cgraph_remove_edge (e);
2227 gsi = gsi_for_stmt (call_stmt);
2228 /* Remove the call stmt. */
2229 gsi_remove (&gsi, true);
2230 /* remove the type cast stmt. */
2231 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2232 gimple_call_lhs (call_stmt))
2234 use_stmt1 = use_stmt;
2235 gsi = gsi_for_stmt (use_stmt);
2236 gsi_remove (&gsi, true);
2238 /* Remove the assignment of the allocated area. */
2239 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2240 gimple_get_lhs (use_stmt1))
2242 gsi = gsi_for_stmt (use_stmt);
2243 gsi_remove (&gsi, true);
2246 update_ssa (TODO_update_ssa);
2247 #ifdef ENABLE_CHECKING
2248 verify_ssa (true);
2249 #endif
2250 /* Delete the calls to free. */
2251 for (i = 1; i < mi->min_indirect_level_escape; i++)
2253 gimple_stmt_iterator gsi;
2255 /* ??? wonder why this case is possible but we failed on it once. */
2256 if (!mi->free_stmts[i].stmt)
2257 continue;
2259 c_node = cgraph_node (mi->free_stmts[i].func);
2260 gcc_assert (is_gimple_call (mi->free_stmts[i].stmt));
2261 e = cgraph_edge (c_node, mi->free_stmts[i].stmt);
2262 gcc_assert (e);
2263 cgraph_remove_edge (e);
2264 current_function_decl = mi->free_stmts[i].func;
2265 set_cfun (DECL_STRUCT_FUNCTION (mi->free_stmts[i].func));
2266 gsi = gsi_for_stmt (mi->free_stmts[i].stmt);
2267 gsi_remove (&gsi, true);
2269 /* Return to the previous situation. */
2270 current_function_decl = oldfn;
2271 pop_cfun ();
2272 return 1;
2277 /* Print out the results of the escape analysis. */
2278 static int
2279 dump_matrix_reorg_analysis (void **slot, void *data ATTRIBUTE_UNUSED)
2281 struct matrix_info *mi = (struct matrix_info *) *slot;
2283 if (!dump_file)
2284 return 1;
2285 fprintf (dump_file, "Matrix \"%s\"; Escaping Level: %d, Num Dims: %d,",
2286 get_name (mi->decl), mi->min_indirect_level_escape, mi->num_dims);
2287 fprintf (dump_file, " Malloc Dims: %d, ", mi->max_malloced_level);
2288 fprintf (dump_file, "\n");
2289 if (mi->min_indirect_level_escape >= 2)
2290 fprintf (dump_file, "Flattened %d dimensions \n",
2291 mi->min_indirect_level_escape);
2292 return 1;
2295 /* Perform matrix flattening. */
2297 static unsigned int
2298 matrix_reorg (void)
2300 struct cgraph_node *node;
2302 if (profile_info)
2303 check_transpose_p = true;
2304 else
2305 check_transpose_p = false;
2306 /* If there are hand written vectors, we skip this optimization. */
2307 for (node = cgraph_nodes; node; node = node->next)
2308 if (!may_flatten_matrices (node))
2309 return 0;
2310 matrices_to_reorg = htab_create (37, mtt_info_hash, mtt_info_eq, mat_free);
2311 /* Find and record all potential matrices in the program. */
2312 find_matrices_decl ();
2313 /* Analyze the accesses of the matrices (escaping analysis). */
2314 for (node = cgraph_nodes; node; node = node->next)
2315 if (node->analyzed)
2317 tree temp_fn;
2319 temp_fn = current_function_decl;
2320 current_function_decl = node->decl;
2321 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2322 bitmap_obstack_initialize (NULL);
2323 gimple_register_cfg_hooks ();
2325 if (!gimple_in_ssa_p (cfun))
2327 free_dominance_info (CDI_DOMINATORS);
2328 free_dominance_info (CDI_POST_DOMINATORS);
2329 pop_cfun ();
2330 current_function_decl = temp_fn;
2331 bitmap_obstack_release (NULL);
2333 return 0;
2336 #ifdef ENABLE_CHECKING
2337 verify_flow_info ();
2338 #endif
2340 if (!matrices_to_reorg)
2342 free_dominance_info (CDI_DOMINATORS);
2343 free_dominance_info (CDI_POST_DOMINATORS);
2344 pop_cfun ();
2345 current_function_decl = temp_fn;
2346 bitmap_obstack_release (NULL);
2348 return 0;
2351 /* Create htap for phi nodes. */
2352 htab_mat_acc_phi_nodes = htab_create (37, mat_acc_phi_hash,
2353 mat_acc_phi_eq, free);
2354 if (!check_transpose_p)
2355 find_sites_in_func (false);
2356 else
2358 find_sites_in_func (true);
2359 loop_optimizer_init (LOOPS_NORMAL);
2360 if (current_loops)
2361 scev_initialize ();
2362 htab_traverse (matrices_to_reorg, analyze_transpose, NULL);
2363 if (current_loops)
2365 scev_finalize ();
2366 loop_optimizer_finalize ();
2367 current_loops = NULL;
2370 /* If the current function is the allocation function for any of
2371 the matrices we check its allocation and the escaping level. */
2372 htab_traverse (matrices_to_reorg, check_allocation_function, NULL);
2373 free_dominance_info (CDI_DOMINATORS);
2374 free_dominance_info (CDI_POST_DOMINATORS);
2375 pop_cfun ();
2376 current_function_decl = temp_fn;
2377 bitmap_obstack_release (NULL);
2379 htab_traverse (matrices_to_reorg, transform_allocation_sites, NULL);
2380 /* Now transform the accesses. */
2381 for (node = cgraph_nodes; node; node = node->next)
2382 if (node->analyzed)
2384 /* Remember that allocation sites have been handled. */
2385 tree temp_fn;
2387 temp_fn = current_function_decl;
2388 current_function_decl = node->decl;
2389 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2390 bitmap_obstack_initialize (NULL);
2391 gimple_register_cfg_hooks ();
2392 record_all_accesses_in_func ();
2393 htab_traverse (matrices_to_reorg, transform_access_sites, NULL);
2394 free_dominance_info (CDI_DOMINATORS);
2395 free_dominance_info (CDI_POST_DOMINATORS);
2396 pop_cfun ();
2397 current_function_decl = temp_fn;
2398 bitmap_obstack_release (NULL);
2400 htab_traverse (matrices_to_reorg, dump_matrix_reorg_analysis, NULL);
2402 current_function_decl = NULL;
2403 set_cfun (NULL);
2404 matrices_to_reorg = NULL;
2405 return 0;
2409 /* The condition for matrix flattening to be performed. */
2410 static bool
2411 gate_matrix_reorg (void)
2413 return flag_ipa_matrix_reorg && flag_whole_program;
2416 struct simple_ipa_opt_pass pass_ipa_matrix_reorg =
2419 SIMPLE_IPA_PASS,
2420 "matrix-reorg", /* name */
2421 gate_matrix_reorg, /* gate */
2422 matrix_reorg, /* execute */
2423 NULL, /* sub */
2424 NULL, /* next */
2425 0, /* static_pass_number */
2426 TV_NONE, /* tv_id */
2427 0, /* properties_required */
2428 0, /* properties_provided */
2429 0, /* properties_destroyed */
2430 0, /* todo_flags_start */
2431 TODO_dump_cgraph | TODO_dump_func /* todo_flags_finish */