2010-07-27 Paolo Carlini <paolo.carlini@oracle.com>
[official-gcc/alias-decl.git] / gcc / matrix-reorg.c
blob84c6e6ea3c2e2e69471efd56f0c4ee7f962ba9d0
1 /* Matrix layout transformations.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
3 Contributed by Razya Ladelsky <razya@il.ibm.com>
4 Originally written by Revital Eres and Mustafa Hagog.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
23 Matrix flattening optimization tries to replace a N-dimensional
24 matrix with its equivalent M-dimensional matrix, where M < N.
25 This first implementation focuses on global matrices defined dynamically.
27 When N==1, we actually flatten the whole matrix.
28 For instance consider a two-dimensional array a [dim1] [dim2].
29 The code for allocating space for it usually looks like:
31 a = (int **) malloc(dim1 * sizeof(int *));
32 for (i=0; i<dim1; i++)
33 a[i] = (int *) malloc (dim2 * sizeof(int));
35 If the array "a" is found suitable for this optimization,
36 its allocation is replaced by:
38 a = (int *) malloc (dim1 * dim2 *sizeof(int));
40 and all the references to a[i][j] are replaced by a[i * dim2 + j].
42 The two main phases of the optimization are the analysis
43 and transformation.
44 The driver of the optimization is matrix_reorg ().
48 Analysis phase:
49 ===============
51 We'll number the dimensions outside-in, meaning the most external
52 is 0, then 1, and so on.
53 The analysis part of the optimization determines K, the escape
54 level of a N-dimensional matrix (K <= N), that allows flattening of
55 the external dimensions 0,1,..., K-1. Escape level 0 means that the
56 whole matrix escapes and no flattening is possible.
58 The analysis part is implemented in analyze_matrix_allocation_site()
59 and analyze_matrix_accesses().
61 Transformation phase:
62 =====================
63 In this phase we define the new flattened matrices that replace the
64 original matrices in the code.
65 Implemented in transform_allocation_sites(),
66 transform_access_sites().
68 Matrix Transposing
69 ==================
70 The idea of Matrix Transposing is organizing the matrix in a different
71 layout such that the dimensions are reordered.
72 This could produce better cache behavior in some cases.
74 For example, lets look at the matrix accesses in the following loop:
76 for (i=0; i<N; i++)
77 for (j=0; j<M; j++)
78 access to a[i][j]
80 This loop can produce good cache behavior because the elements of
81 the inner dimension are accessed sequentially.
83 However, if the accesses of the matrix were of the following form:
85 for (i=0; i<N; i++)
86 for (j=0; j<M; j++)
87 access to a[j][i]
89 In this loop we iterate the columns and not the rows.
90 Therefore, replacing the rows and columns
91 would have had an organization with better (cache) locality.
92 Replacing the dimensions of the matrix is called matrix transposing.
94 This example, of course, could be enhanced to multiple dimensions matrices
95 as well.
97 Since a program could include all kind of accesses, there is a decision
98 mechanism, implemented in analyze_transpose(), which implements a
99 heuristic that tries to determine whether to transpose the matrix or not,
100 according to the form of the more dominant accesses.
101 This decision is transferred to the flattening mechanism, and whether
102 the matrix was transposed or not, the matrix is flattened (if possible).
104 This decision making is based on profiling information and loop information.
105 If profiling information is available, decision making mechanism will be
106 operated, otherwise the matrix will only be flattened (if possible).
108 Both optimizations are described in the paper "Matrix flattening and
109 transposing in GCC" which was presented in GCC summit 2006.
110 http://www.gccsummit.org/2006/2006-GCC-Summit-Proceedings.pdf. */
112 #include "config.h"
113 #include "system.h"
114 #include "coretypes.h"
115 #include "tm.h"
116 #include "tree.h"
117 #include "rtl.h"
118 #include "tree-inline.h"
119 #include "tree-flow.h"
120 #include "tree-flow-inline.h"
121 #include "langhooks.h"
122 #include "hashtab.h"
123 #include "toplev.h"
124 #include "flags.h"
125 #include "ggc.h"
126 #include "debug.h"
127 #include "target.h"
128 #include "cgraph.h"
129 #include "diagnostic-core.h"
130 #include "timevar.h"
131 #include "params.h"
132 #include "fibheap.h"
133 #include "intl.h"
134 #include "function.h"
135 #include "basic-block.h"
136 #include "cfgloop.h"
137 #include "tree-iterator.h"
138 #include "tree-pass.h"
139 #include "opts.h"
140 #include "tree-data-ref.h"
141 #include "tree-chrec.h"
142 #include "tree-scalar-evolution.h"
143 #include "tree-ssa-sccvn.h"
145 /* We need to collect a lot of data from the original malloc,
146 particularly as the gimplifier has converted:
148 orig_var = (struct_type *) malloc (x * sizeof (struct_type *));
150 into
152 T3 = <constant> ; ** <constant> is amount to malloc; precomputed **
153 T4 = malloc (T3);
154 T5 = (struct_type *) T4;
155 orig_var = T5;
157 The following struct fields allow us to collect all the necessary data from
158 the gimplified program. The comments in the struct below are all based
159 on the gimple example above. */
161 struct malloc_call_data
163 gimple call_stmt; /* Tree for "T4 = malloc (T3);" */
164 tree size_var; /* Var decl for T3. */
165 tree malloc_size; /* Tree for "<constant>", the rhs assigned to T3. */
168 static tree can_calculate_expr_before_stmt (tree, sbitmap);
169 static tree can_calculate_stmt_before_stmt (gimple, sbitmap);
171 /* The front end of the compiler, when parsing statements of the form:
173 var = (type_cast) malloc (sizeof (type));
175 always converts this single statement into the following statements
176 (GIMPLE form):
178 T.1 = sizeof (type);
179 T.2 = malloc (T.1);
180 T.3 = (type_cast) T.2;
181 var = T.3;
183 Since we need to create new malloc statements and modify the original
184 statements somewhat, we need to find all four of the above statements.
185 Currently record_call_1 (called for building cgraph edges) finds and
186 records the statements containing the actual call to malloc, but we
187 need to find the rest of the variables/statements on our own. That
188 is what the following function does. */
189 static void
190 collect_data_for_malloc_call (gimple stmt, struct malloc_call_data *m_data)
192 tree size_var = NULL;
193 tree malloc_fn_decl;
194 tree arg1;
196 gcc_assert (is_gimple_call (stmt));
198 malloc_fn_decl = gimple_call_fndecl (stmt);
199 if (malloc_fn_decl == NULL
200 || DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
201 return;
203 arg1 = gimple_call_arg (stmt, 0);
204 size_var = arg1;
206 m_data->call_stmt = stmt;
207 m_data->size_var = size_var;
208 if (TREE_CODE (size_var) != VAR_DECL)
209 m_data->malloc_size = size_var;
210 else
211 m_data->malloc_size = NULL_TREE;
214 /* Information about matrix access site.
215 For example: if an access site of matrix arr is arr[i][j]
216 the ACCESS_SITE_INFO structure will have the address
217 of arr as its stmt. The INDEX_INFO will hold information about the
218 initial address and index of each dimension. */
219 struct access_site_info
221 /* The statement (MEM_REF or POINTER_PLUS_EXPR). */
222 gimple stmt;
224 /* In case of POINTER_PLUS_EXPR, what is the offset. */
225 tree offset;
227 /* The index which created the offset. */
228 tree index;
230 /* The indirection level of this statement. */
231 int level;
233 /* TRUE for allocation site FALSE for access site. */
234 bool is_alloc;
236 /* The function containing the access site. */
237 tree function_decl;
239 /* This access is iterated in the inner most loop */
240 bool iterated_by_inner_most_loop_p;
243 typedef struct access_site_info *access_site_info_p;
244 DEF_VEC_P (access_site_info_p);
245 DEF_VEC_ALLOC_P (access_site_info_p, heap);
247 /* Calls to free when flattening a matrix. */
249 struct free_info
251 gimple stmt;
252 tree func;
255 /* Information about matrix to flatten. */
256 struct matrix_info
258 /* Decl tree of this matrix. */
259 tree decl;
260 /* Number of dimensions; number
261 of "*" in the type declaration. */
262 int num_dims;
264 /* Minimum indirection level that escapes, 0 means that
265 the whole matrix escapes, k means that dimensions
266 0 to ACTUAL_DIM - k escapes. */
267 int min_indirect_level_escape;
269 gimple min_indirect_level_escape_stmt;
271 /* Hold the allocation site for each level (dimension).
272 We can use NUM_DIMS as the upper bound and allocate the array
273 once with this number of elements and no need to use realloc and
274 MAX_MALLOCED_LEVEL. */
275 gimple *malloc_for_level;
277 int max_malloced_level;
279 /* Is the matrix transposed. */
280 bool is_transposed_p;
282 /* The location of the allocation sites (they must be in one
283 function). */
284 tree allocation_function_decl;
286 /* The calls to free for each level of indirection. */
287 struct free_info *free_stmts;
289 /* An array which holds for each dimension its size. where
290 dimension 0 is the outer most (one that contains all the others).
292 tree *dimension_size;
294 /* An array which holds for each dimension it's original size
295 (before transposing and flattening take place). */
296 tree *dimension_size_orig;
298 /* An array which holds for each dimension the size of the type of
299 of elements accessed in that level (in bytes). */
300 HOST_WIDE_INT *dimension_type_size;
302 int dimension_type_size_len;
304 /* An array collecting the count of accesses for each dimension. */
305 gcov_type *dim_hot_level;
307 /* An array of the accesses to be flattened.
308 elements are of type "struct access_site_info *". */
309 VEC (access_site_info_p, heap) * access_l;
311 /* A map of how the dimensions will be organized at the end of
312 the analyses. */
313 int *dim_map;
316 /* In each phi node we want to record the indirection level we have when we
317 get to the phi node. Usually we will have phi nodes with more than two
318 arguments, then we must assure that all of them get to the phi node with
319 the same indirection level, otherwise it's not safe to do the flattening.
320 So we record the information regarding the indirection level each time we
321 get to the phi node in this hash table. */
323 struct matrix_access_phi_node
325 gimple phi;
326 int indirection_level;
329 /* We use this structure to find if the SSA variable is accessed inside the
330 tree and record the tree containing it. */
332 struct ssa_acc_in_tree
334 /* The variable whose accesses in the tree we are looking for. */
335 tree ssa_var;
336 /* The tree and code inside it the ssa_var is accessed, currently
337 it could be an MEM_REF or CALL_EXPR. */
338 enum tree_code t_code;
339 tree t_tree;
340 /* The place in the containing tree. */
341 tree *tp;
342 tree second_op;
343 bool var_found;
346 static void analyze_matrix_accesses (struct matrix_info *, tree, int, bool,
347 sbitmap, bool);
348 static int transform_allocation_sites (void **, void *);
349 static int transform_access_sites (void **, void *);
350 static int analyze_transpose (void **, void *);
351 static int dump_matrix_reorg_analysis (void **, void *);
353 static bool check_transpose_p;
355 /* Hash function used for the phi nodes. */
357 static hashval_t
358 mat_acc_phi_hash (const void *p)
360 const struct matrix_access_phi_node *const ma_phi =
361 (const struct matrix_access_phi_node *) p;
363 return htab_hash_pointer (ma_phi->phi);
366 /* Equality means phi node pointers are the same. */
368 static int
369 mat_acc_phi_eq (const void *p1, const void *p2)
371 const struct matrix_access_phi_node *const phi1 =
372 (const struct matrix_access_phi_node *) p1;
373 const struct matrix_access_phi_node *const phi2 =
374 (const struct matrix_access_phi_node *) p2;
376 if (phi1->phi == phi2->phi)
377 return 1;
379 return 0;
382 /* Hold the PHI nodes we visit during the traversal for escaping
383 analysis. */
384 static htab_t htab_mat_acc_phi_nodes = NULL;
386 /* This hash-table holds the information about the matrices we are
387 going to handle. */
388 static htab_t matrices_to_reorg = NULL;
390 /* Return a hash for MTT, which is really a "matrix_info *". */
391 static hashval_t
392 mtt_info_hash (const void *mtt)
394 return htab_hash_pointer (((const struct matrix_info *) mtt)->decl);
397 /* Return true if MTT1 and MTT2 (which are really both of type
398 "matrix_info *") refer to the same decl. */
399 static int
400 mtt_info_eq (const void *mtt1, const void *mtt2)
402 const struct matrix_info *const i1 = (const struct matrix_info *) mtt1;
403 const struct matrix_info *const i2 = (const struct matrix_info *) mtt2;
405 if (i1->decl == i2->decl)
406 return true;
408 return false;
411 /* Return false if STMT may contain a vector expression.
412 In this situation, all matrices should not be flattened. */
413 static bool
414 may_flatten_matrices_1 (gimple stmt)
416 switch (gimple_code (stmt))
418 case GIMPLE_ASSIGN:
419 case GIMPLE_CALL:
420 if (!gimple_has_lhs (stmt))
421 return true;
422 if (TREE_CODE (TREE_TYPE (gimple_get_lhs (stmt))) == VECTOR_TYPE)
424 if (dump_file)
425 fprintf (dump_file,
426 "Found vector type, don't flatten matrix\n");
427 return false;
429 break;
430 case GIMPLE_ASM:
431 /* Asm code could contain vector operations. */
432 return false;
433 break;
434 default:
435 break;
437 return true;
440 /* Return false if there are hand-written vectors in the program.
441 We disable the flattening in such a case. */
442 static bool
443 may_flatten_matrices (struct cgraph_node *node)
445 tree decl;
446 struct function *func;
447 basic_block bb;
448 gimple_stmt_iterator gsi;
450 decl = node->decl;
451 if (node->analyzed)
453 func = DECL_STRUCT_FUNCTION (decl);
454 FOR_EACH_BB_FN (bb, func)
455 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
456 if (!may_flatten_matrices_1 (gsi_stmt (gsi)))
457 return false;
459 return true;
462 /* Given a VAR_DECL, check its type to determine whether it is
463 a definition of a dynamic allocated matrix and therefore is
464 a suitable candidate for the matrix flattening optimization.
465 Return NULL if VAR_DECL is not such decl. Otherwise, allocate
466 a MATRIX_INFO structure, fill it with the relevant information
467 and return a pointer to it.
468 TODO: handle also statically defined arrays. */
469 static struct matrix_info *
470 analyze_matrix_decl (tree var_decl)
472 struct matrix_info *m_node, tmpmi, *mi;
473 tree var_type;
474 int dim_num = 0;
476 gcc_assert (matrices_to_reorg);
478 if (TREE_CODE (var_decl) == PARM_DECL)
479 var_type = DECL_ARG_TYPE (var_decl);
480 else if (TREE_CODE (var_decl) == VAR_DECL)
481 var_type = TREE_TYPE (var_decl);
482 else
483 return NULL;
485 if (!POINTER_TYPE_P (var_type))
486 return NULL;
488 while (POINTER_TYPE_P (var_type))
490 var_type = TREE_TYPE (var_type);
491 dim_num++;
494 if (dim_num <= 1)
495 return NULL;
497 if (!COMPLETE_TYPE_P (var_type)
498 || TREE_CODE (TYPE_SIZE_UNIT (var_type)) != INTEGER_CST)
499 return NULL;
501 /* Check to see if this pointer is already in there. */
502 tmpmi.decl = var_decl;
503 mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi);
505 if (mi)
506 return NULL;
508 /* Record the matrix. */
510 m_node = (struct matrix_info *) xcalloc (1, sizeof (struct matrix_info));
511 m_node->decl = var_decl;
512 m_node->num_dims = dim_num;
513 m_node->free_stmts
514 = (struct free_info *) xcalloc (dim_num, sizeof (struct free_info));
516 /* Init min_indirect_level_escape to -1 to indicate that no escape
517 analysis has been done yet. */
518 m_node->min_indirect_level_escape = -1;
519 m_node->is_transposed_p = false;
521 return m_node;
524 /* Free matrix E. */
525 static void
526 mat_free (void *e)
528 struct matrix_info *mat = (struct matrix_info *) e;
530 if (!mat)
531 return;
533 if (mat->free_stmts)
534 free (mat->free_stmts);
535 if (mat->dim_hot_level)
536 free (mat->dim_hot_level);
537 if (mat->malloc_for_level)
538 free (mat->malloc_for_level);
541 /* Find all potential matrices.
542 TODO: currently we handle only multidimensional
543 dynamically allocated arrays. */
544 static void
545 find_matrices_decl (void)
547 struct matrix_info *tmp;
548 PTR *slot;
549 struct varpool_node *vnode;
551 gcc_assert (matrices_to_reorg);
553 /* For every global variable in the program:
554 Check to see if it's of a candidate type and record it. */
555 for (vnode = varpool_nodes_queue; vnode; vnode = vnode->next_needed)
557 tree var_decl = vnode->decl;
559 if (!var_decl || TREE_CODE (var_decl) != VAR_DECL)
560 continue;
562 if (matrices_to_reorg)
563 if ((tmp = analyze_matrix_decl (var_decl)))
565 if (!TREE_ADDRESSABLE (var_decl))
567 slot = htab_find_slot (matrices_to_reorg, tmp, INSERT);
568 *slot = tmp;
572 return;
575 /* Mark that the matrix MI escapes at level L. */
576 static void
577 mark_min_matrix_escape_level (struct matrix_info *mi, int l, gimple s)
579 if (mi->min_indirect_level_escape == -1
580 || (mi->min_indirect_level_escape > l))
582 mi->min_indirect_level_escape = l;
583 mi->min_indirect_level_escape_stmt = s;
587 /* Find if the SSA variable is accessed inside the
588 tree and record the tree containing it.
589 The only relevant uses are the case of SSA_NAME, or SSA inside
590 MEM_REF, PLUS_EXPR, POINTER_PLUS_EXPR, MULT_EXPR. */
591 static void
592 ssa_accessed_in_tree (tree t, struct ssa_acc_in_tree *a)
594 a->t_code = TREE_CODE (t);
595 switch (a->t_code)
597 case SSA_NAME:
598 if (t == a->ssa_var)
599 a->var_found = true;
600 break;
601 case MEM_REF:
602 if (SSA_VAR_P (TREE_OPERAND (t, 0))
603 && TREE_OPERAND (t, 0) == a->ssa_var)
604 a->var_found = true;
605 break;
606 default:
607 break;
611 /* Find if the SSA variable is accessed on the right hand side of
612 gimple call STMT. */
614 static void
615 ssa_accessed_in_call_rhs (gimple stmt, struct ssa_acc_in_tree *a)
617 tree decl;
618 tree arg;
619 size_t i;
621 a->t_code = CALL_EXPR;
622 for (i = 0; i < gimple_call_num_args (stmt); i++)
624 arg = gimple_call_arg (stmt, i);
625 if (arg == a->ssa_var)
627 a->var_found = true;
628 decl = gimple_call_fndecl (stmt);
629 a->t_tree = decl;
630 break;
635 /* Find if the SSA variable is accessed on the right hand side of
636 gimple assign STMT. */
638 static void
639 ssa_accessed_in_assign_rhs (gimple stmt, struct ssa_acc_in_tree *a)
642 a->t_code = gimple_assign_rhs_code (stmt);
643 switch (a->t_code)
645 tree op1, op2;
647 case SSA_NAME:
648 case MEM_REF:
649 CASE_CONVERT:
650 case VIEW_CONVERT_EXPR:
651 ssa_accessed_in_tree (gimple_assign_rhs1 (stmt), a);
652 break;
653 case POINTER_PLUS_EXPR:
654 case PLUS_EXPR:
655 case MULT_EXPR:
656 op1 = gimple_assign_rhs1 (stmt);
657 op2 = gimple_assign_rhs2 (stmt);
659 if (op1 == a->ssa_var)
661 a->var_found = true;
662 a->second_op = op2;
664 else if (op2 == a->ssa_var)
666 a->var_found = true;
667 a->second_op = op1;
669 break;
670 default:
671 break;
675 /* Record the access/allocation site information for matrix MI so we can
676 handle it later in transformation. */
677 static void
678 record_access_alloc_site_info (struct matrix_info *mi, gimple stmt, tree offset,
679 tree index, int level, bool is_alloc)
681 struct access_site_info *acc_info;
683 if (!mi->access_l)
684 mi->access_l = VEC_alloc (access_site_info_p, heap, 100);
686 acc_info
687 = (struct access_site_info *)
688 xcalloc (1, sizeof (struct access_site_info));
689 acc_info->stmt = stmt;
690 acc_info->offset = offset;
691 acc_info->index = index;
692 acc_info->function_decl = current_function_decl;
693 acc_info->level = level;
694 acc_info->is_alloc = is_alloc;
696 VEC_safe_push (access_site_info_p, heap, mi->access_l, acc_info);
700 /* Record the malloc as the allocation site of the given LEVEL. But
701 first we Make sure that all the size parameters passed to malloc in
702 all the allocation sites could be pre-calculated before the call to
703 the malloc of level 0 (the main malloc call). */
704 static void
705 add_allocation_site (struct matrix_info *mi, gimple stmt, int level)
707 struct malloc_call_data mcd;
709 /* Make sure that the allocation sites are in the same function. */
710 if (!mi->allocation_function_decl)
711 mi->allocation_function_decl = current_function_decl;
712 else if (mi->allocation_function_decl != current_function_decl)
714 int min_malloc_level;
716 gcc_assert (mi->malloc_for_level);
718 /* Find the minimum malloc level that already has been seen;
719 we known its allocation function must be
720 MI->allocation_function_decl since it's different than
721 CURRENT_FUNCTION_DECL then the escaping level should be
722 MIN (LEVEL, MIN_MALLOC_LEVEL) - 1 , and the allocation function
723 must be set accordingly. */
724 for (min_malloc_level = 0;
725 min_malloc_level < mi->max_malloced_level
726 && mi->malloc_for_level[min_malloc_level]; min_malloc_level++);
727 if (level < min_malloc_level)
729 mi->allocation_function_decl = current_function_decl;
730 mark_min_matrix_escape_level (mi, min_malloc_level, stmt);
732 else
734 mark_min_matrix_escape_level (mi, level, stmt);
735 /* cannot be that (level == min_malloc_level)
736 we would have returned earlier. */
737 return;
741 /* Find the correct malloc information. */
742 collect_data_for_malloc_call (stmt, &mcd);
744 /* We accept only calls to malloc function; we do not accept
745 calls like calloc and realloc. */
746 if (!mi->malloc_for_level)
748 mi->malloc_for_level = XCNEWVEC (gimple, level + 1);
749 mi->max_malloced_level = level + 1;
751 else if (mi->max_malloced_level <= level)
753 mi->malloc_for_level
754 = XRESIZEVEC (gimple, mi->malloc_for_level, level + 1);
756 /* Zero the newly allocated items. */
757 memset (&(mi->malloc_for_level[mi->max_malloced_level + 1]),
758 0, (level - mi->max_malloced_level) * sizeof (tree));
760 mi->max_malloced_level = level + 1;
762 mi->malloc_for_level[level] = stmt;
765 /* Given an assignment statement STMT that we know that its
766 left-hand-side is the matrix MI variable, we traverse the immediate
767 uses backwards until we get to a malloc site. We make sure that
768 there is one and only one malloc site that sets this variable. When
769 we are performing the flattening we generate a new variable that
770 will hold the size for each dimension; each malloc that allocates a
771 dimension has the size parameter; we use that parameter to
772 initialize the dimension size variable so we can use it later in
773 the address calculations. LEVEL is the dimension we're inspecting.
774 Return if STMT is related to an allocation site. */
776 static void
777 analyze_matrix_allocation_site (struct matrix_info *mi, gimple stmt,
778 int level, sbitmap visited)
780 if (gimple_assign_copy_p (stmt) || gimple_assign_cast_p (stmt))
782 tree rhs = gimple_assign_rhs1 (stmt);
784 if (TREE_CODE (rhs) == SSA_NAME)
786 gimple def = SSA_NAME_DEF_STMT (rhs);
788 analyze_matrix_allocation_site (mi, def, level, visited);
789 return;
791 /* If we are back to the original matrix variable then we
792 are sure that this is analyzed as an access site. */
793 else if (rhs == mi->decl)
794 return;
796 /* A result of call to malloc. */
797 else if (is_gimple_call (stmt))
799 int call_flags = gimple_call_flags (stmt);
801 if (!(call_flags & ECF_MALLOC))
803 mark_min_matrix_escape_level (mi, level, stmt);
804 return;
806 else
808 tree malloc_fn_decl;
810 malloc_fn_decl = gimple_call_fndecl (stmt);
811 if (malloc_fn_decl == NULL_TREE)
813 mark_min_matrix_escape_level (mi, level, stmt);
814 return;
816 if (DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
818 if (dump_file)
819 fprintf (dump_file,
820 "Matrix %s is an argument to function %s\n",
821 get_name (mi->decl), get_name (malloc_fn_decl));
822 mark_min_matrix_escape_level (mi, level, stmt);
823 return;
826 /* This is a call to malloc of level 'level'.
827 mi->max_malloced_level-1 == level means that we've
828 seen a malloc statement of level 'level' before.
829 If the statement is not the same one that we've
830 seen before, then there's another malloc statement
831 for the same level, which means that we need to mark
832 it escaping. */
833 if (mi->malloc_for_level
834 && mi->max_malloced_level-1 == level
835 && mi->malloc_for_level[level] != stmt)
837 mark_min_matrix_escape_level (mi, level, stmt);
838 return;
840 else
841 add_allocation_site (mi, stmt, level);
842 return;
844 /* Looks like we don't know what is happening in this
845 statement so be in the safe side and mark it as escaping. */
846 mark_min_matrix_escape_level (mi, level, stmt);
849 /* The transposing decision making.
850 In order to to calculate the profitability of transposing, we collect two
851 types of information regarding the accesses:
852 1. profiling information used to express the hotness of an access, that
853 is how often the matrix is accessed by this access site (count of the
854 access site).
855 2. which dimension in the access site is iterated by the inner
856 most loop containing this access.
858 The matrix will have a calculated value of weighted hotness for each
859 dimension.
860 Intuitively the hotness level of a dimension is a function of how
861 many times it was the most frequently accessed dimension in the
862 highly executed access sites of this matrix.
864 As computed by following equation:
866 __ __
867 \ \ dim_hot_level[i] +=
868 /_ /_
870 acc[j]->dim[i]->iter_by_inner_loop * count(j)
872 Where n is the number of dims and m is the number of the matrix
873 access sites. acc[j]->dim[i]->iter_by_inner_loop is 1 if acc[j]
874 iterates over dim[i] in innermost loop, and is 0 otherwise.
876 The organization of the new matrix should be according to the
877 hotness of each dimension. The hotness of the dimension implies
878 the locality of the elements.*/
879 static int
880 analyze_transpose (void **slot, void *data ATTRIBUTE_UNUSED)
882 struct matrix_info *mi = (struct matrix_info *) *slot;
883 int min_escape_l = mi->min_indirect_level_escape;
884 struct loop *loop;
885 affine_iv iv;
886 struct access_site_info *acc_info;
887 int i;
889 if (min_escape_l < 2 || !mi->access_l)
891 if (mi->access_l)
893 for (i = 0;
894 VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
895 i++)
896 free (acc_info);
897 VEC_free (access_site_info_p, heap, mi->access_l);
900 return 1;
902 if (!mi->dim_hot_level)
903 mi->dim_hot_level =
904 (gcov_type *) xcalloc (min_escape_l, sizeof (gcov_type));
907 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
908 i++)
910 if (gimple_assign_rhs_code (acc_info->stmt) == POINTER_PLUS_EXPR
911 && acc_info->level < min_escape_l)
913 loop = loop_containing_stmt (acc_info->stmt);
914 if (!loop || loop->inner)
916 free (acc_info);
917 continue;
919 if (simple_iv (loop, loop, acc_info->offset, &iv, true))
921 if (iv.step != NULL)
923 HOST_WIDE_INT istep;
925 istep = int_cst_value (iv.step);
926 if (istep != 0)
928 acc_info->iterated_by_inner_most_loop_p = 1;
929 mi->dim_hot_level[acc_info->level] +=
930 gimple_bb (acc_info->stmt)->count;
936 free (acc_info);
938 VEC_free (access_site_info_p, heap, mi->access_l);
940 return 1;
943 /* Find the index which defines the OFFSET from base.
944 We walk from use to def until we find how the offset was defined. */
945 static tree
946 get_index_from_offset (tree offset, gimple def_stmt)
948 tree op1, op2, index;
950 if (gimple_code (def_stmt) == GIMPLE_PHI)
951 return NULL;
952 if ((gimple_assign_copy_p (def_stmt) || gimple_assign_cast_p (def_stmt))
953 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
954 return get_index_from_offset (offset,
955 SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt)));
956 else if (is_gimple_assign (def_stmt)
957 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
959 op1 = gimple_assign_rhs1 (def_stmt);
960 op2 = gimple_assign_rhs2 (def_stmt);
961 if (TREE_CODE (op1) != INTEGER_CST && TREE_CODE (op2) != INTEGER_CST)
962 return NULL;
963 index = (TREE_CODE (op1) == INTEGER_CST) ? op2 : op1;
964 return index;
966 else
967 return NULL_TREE;
970 /* update MI->dimension_type_size[CURRENT_INDIRECT_LEVEL] with the size
971 of the type related to the SSA_VAR, or the type related to the
972 lhs of STMT, in the case that it is an MEM_REF. */
973 static void
974 update_type_size (struct matrix_info *mi, gimple stmt, tree ssa_var,
975 int current_indirect_level)
977 tree lhs;
978 HOST_WIDE_INT type_size;
980 /* Update type according to the type of the MEM_REF expr. */
981 if (is_gimple_assign (stmt)
982 && TREE_CODE (gimple_assign_lhs (stmt)) == MEM_REF)
984 lhs = gimple_assign_lhs (stmt);
985 gcc_assert (POINTER_TYPE_P
986 (TREE_TYPE (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
987 type_size =
988 int_size_in_bytes (TREE_TYPE
989 (TREE_TYPE
990 (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
992 else
993 type_size = int_size_in_bytes (TREE_TYPE (ssa_var));
995 /* Record the size of elements accessed (as a whole)
996 in the current indirection level (dimension). If the size of
997 elements is not known at compile time, mark it as escaping. */
998 if (type_size <= 0)
999 mark_min_matrix_escape_level (mi, current_indirect_level, stmt);
1000 else
1002 int l = current_indirect_level;
1004 if (!mi->dimension_type_size)
1006 mi->dimension_type_size
1007 = (HOST_WIDE_INT *) xcalloc (l + 1, sizeof (HOST_WIDE_INT));
1008 mi->dimension_type_size_len = l + 1;
1010 else if (mi->dimension_type_size_len < l + 1)
1012 mi->dimension_type_size
1013 = (HOST_WIDE_INT *) xrealloc (mi->dimension_type_size,
1014 (l + 1) * sizeof (HOST_WIDE_INT));
1015 memset (&mi->dimension_type_size[mi->dimension_type_size_len],
1016 0, (l + 1 - mi->dimension_type_size_len)
1017 * sizeof (HOST_WIDE_INT));
1018 mi->dimension_type_size_len = l + 1;
1020 /* Make sure all the accesses in the same level have the same size
1021 of the type. */
1022 if (!mi->dimension_type_size[l])
1023 mi->dimension_type_size[l] = type_size;
1024 else if (mi->dimension_type_size[l] != type_size)
1025 mark_min_matrix_escape_level (mi, l, stmt);
1029 /* USE_STMT represents a GIMPLE_CALL, where one of the arguments is the
1030 ssa var that we want to check because it came from some use of matrix
1031 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so
1032 far. */
1034 static int
1035 analyze_accesses_for_call_stmt (struct matrix_info *mi, tree ssa_var,
1036 gimple use_stmt, int current_indirect_level)
1038 tree fndecl = gimple_call_fndecl (use_stmt);
1040 if (gimple_call_lhs (use_stmt))
1042 tree lhs = gimple_call_lhs (use_stmt);
1043 struct ssa_acc_in_tree lhs_acc, rhs_acc;
1045 memset (&lhs_acc, 0, sizeof (lhs_acc));
1046 memset (&rhs_acc, 0, sizeof (rhs_acc));
1048 lhs_acc.ssa_var = ssa_var;
1049 lhs_acc.t_code = ERROR_MARK;
1050 ssa_accessed_in_tree (lhs, &lhs_acc);
1051 rhs_acc.ssa_var = ssa_var;
1052 rhs_acc.t_code = ERROR_MARK;
1053 ssa_accessed_in_call_rhs (use_stmt, &rhs_acc);
1055 /* The SSA must be either in the left side or in the right side,
1056 to understand what is happening.
1057 In case the SSA_NAME is found in both sides we should be escaping
1058 at this level because in this case we cannot calculate the
1059 address correctly. */
1060 if ((lhs_acc.var_found && rhs_acc.var_found
1061 && lhs_acc.t_code == MEM_REF)
1062 || (!rhs_acc.var_found && !lhs_acc.var_found))
1064 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1065 return current_indirect_level;
1067 gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1069 /* If we are storing to the matrix at some level, then mark it as
1070 escaping at that level. */
1071 if (lhs_acc.var_found)
1073 int l = current_indirect_level + 1;
1075 gcc_assert (lhs_acc.t_code == MEM_REF);
1076 mark_min_matrix_escape_level (mi, l, use_stmt);
1077 return current_indirect_level;
1081 if (fndecl)
1083 if (DECL_FUNCTION_CODE (fndecl) != BUILT_IN_FREE)
1085 if (dump_file)
1086 fprintf (dump_file,
1087 "Matrix %s: Function call %s, level %d escapes.\n",
1088 get_name (mi->decl), get_name (fndecl),
1089 current_indirect_level);
1090 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1092 else if (mi->free_stmts[current_indirect_level].stmt != NULL
1093 && mi->free_stmts[current_indirect_level].stmt != use_stmt)
1094 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1095 else
1097 /*Record the free statements so we can delete them
1098 later. */
1099 int l = current_indirect_level;
1101 mi->free_stmts[l].stmt = use_stmt;
1102 mi->free_stmts[l].func = current_function_decl;
1105 return current_indirect_level;
1108 /* USE_STMT represents a phi node of the ssa var that we want to
1109 check because it came from some use of matrix
1111 We check all the escaping levels that get to the PHI node
1112 and make sure they are all the same escaping;
1113 if not (which is rare) we let the escaping level be the
1114 minimum level that gets into that PHI because starting from
1115 that level we cannot expect the behavior of the indirections.
1116 CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1118 static void
1119 analyze_accesses_for_phi_node (struct matrix_info *mi, gimple use_stmt,
1120 int current_indirect_level, sbitmap visited,
1121 bool record_accesses)
1124 struct matrix_access_phi_node tmp_maphi, *maphi, **pmaphi;
1126 tmp_maphi.phi = use_stmt;
1127 if ((maphi = (struct matrix_access_phi_node *)
1128 htab_find (htab_mat_acc_phi_nodes, &tmp_maphi)))
1130 if (maphi->indirection_level == current_indirect_level)
1131 return;
1132 else
1134 int level = MIN (maphi->indirection_level,
1135 current_indirect_level);
1136 size_t j;
1137 gimple stmt = NULL;
1139 maphi->indirection_level = level;
1140 for (j = 0; j < gimple_phi_num_args (use_stmt); j++)
1142 tree def = PHI_ARG_DEF (use_stmt, j);
1144 if (gimple_code (SSA_NAME_DEF_STMT (def)) != GIMPLE_PHI)
1145 stmt = SSA_NAME_DEF_STMT (def);
1147 mark_min_matrix_escape_level (mi, level, stmt);
1149 return;
1151 maphi = (struct matrix_access_phi_node *)
1152 xcalloc (1, sizeof (struct matrix_access_phi_node));
1153 maphi->phi = use_stmt;
1154 maphi->indirection_level = current_indirect_level;
1156 /* Insert to hash table. */
1157 pmaphi = (struct matrix_access_phi_node **)
1158 htab_find_slot (htab_mat_acc_phi_nodes, maphi, INSERT);
1159 gcc_assert (pmaphi);
1160 *pmaphi = maphi;
1162 if (!TEST_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt))))
1164 SET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1165 analyze_matrix_accesses (mi, PHI_RESULT (use_stmt),
1166 current_indirect_level, false, visited,
1167 record_accesses);
1168 RESET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1172 /* USE_STMT represents an assign statement (the rhs or lhs include
1173 the ssa var that we want to check because it came from some use of matrix
1174 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1176 static int
1177 analyze_accesses_for_assign_stmt (struct matrix_info *mi, tree ssa_var,
1178 gimple use_stmt, int current_indirect_level,
1179 bool last_op, sbitmap visited,
1180 bool record_accesses)
1182 tree lhs = gimple_get_lhs (use_stmt);
1183 struct ssa_acc_in_tree lhs_acc, rhs_acc;
1185 memset (&lhs_acc, 0, sizeof (lhs_acc));
1186 memset (&rhs_acc, 0, sizeof (rhs_acc));
1188 lhs_acc.ssa_var = ssa_var;
1189 lhs_acc.t_code = ERROR_MARK;
1190 ssa_accessed_in_tree (lhs, &lhs_acc);
1191 rhs_acc.ssa_var = ssa_var;
1192 rhs_acc.t_code = ERROR_MARK;
1193 ssa_accessed_in_assign_rhs (use_stmt, &rhs_acc);
1195 /* The SSA must be either in the left side or in the right side,
1196 to understand what is happening.
1197 In case the SSA_NAME is found in both sides we should be escaping
1198 at this level because in this case we cannot calculate the
1199 address correctly. */
1200 if ((lhs_acc.var_found && rhs_acc.var_found
1201 && lhs_acc.t_code == MEM_REF)
1202 || (!rhs_acc.var_found && !lhs_acc.var_found))
1204 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1205 return current_indirect_level;
1207 gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1209 /* If we are storing to the matrix at some level, then mark it as
1210 escaping at that level. */
1211 if (lhs_acc.var_found)
1213 int l = current_indirect_level + 1;
1215 gcc_assert (lhs_acc.t_code == MEM_REF);
1217 if (!(gimple_assign_copy_p (use_stmt)
1218 || gimple_assign_cast_p (use_stmt))
1219 || (TREE_CODE (gimple_assign_rhs1 (use_stmt)) != SSA_NAME))
1220 mark_min_matrix_escape_level (mi, l, use_stmt);
1221 else
1223 gimple def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (use_stmt));
1224 analyze_matrix_allocation_site (mi, def_stmt, l, visited);
1225 if (record_accesses)
1226 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1227 NULL_TREE, l, true);
1228 update_type_size (mi, use_stmt, NULL, l);
1230 return current_indirect_level;
1232 /* Now, check the right-hand-side, to see how the SSA variable
1233 is used. */
1234 if (rhs_acc.var_found)
1236 if (rhs_acc.t_code != MEM_REF
1237 && rhs_acc.t_code != POINTER_PLUS_EXPR && rhs_acc.t_code != SSA_NAME)
1239 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1240 return current_indirect_level;
1242 /* If the access in the RHS has an indirection increase the
1243 indirection level. */
1244 if (rhs_acc.t_code == MEM_REF)
1246 if (record_accesses)
1247 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1248 NULL_TREE,
1249 current_indirect_level, true);
1250 current_indirect_level += 1;
1252 else if (rhs_acc.t_code == POINTER_PLUS_EXPR)
1254 gcc_assert (rhs_acc.second_op);
1255 if (last_op)
1256 /* Currently we support only one PLUS expression on the
1257 SSA_NAME that holds the base address of the current
1258 indirection level; to support more general case there
1259 is a need to hold a stack of expressions and regenerate
1260 the calculation later. */
1261 mark_min_matrix_escape_level (mi, current_indirect_level,
1262 use_stmt);
1263 else
1265 tree index;
1266 tree op1, op2;
1268 op1 = gimple_assign_rhs1 (use_stmt);
1269 op2 = gimple_assign_rhs2 (use_stmt);
1271 op2 = (op1 == ssa_var) ? op2 : op1;
1272 if (TREE_CODE (op2) == INTEGER_CST)
1273 index =
1274 build_int_cst (TREE_TYPE (op1),
1275 TREE_INT_CST_LOW (op2) /
1276 int_size_in_bytes (TREE_TYPE (op1)));
1277 else
1279 index =
1280 get_index_from_offset (op2, SSA_NAME_DEF_STMT (op2));
1281 if (index == NULL_TREE)
1283 mark_min_matrix_escape_level (mi,
1284 current_indirect_level,
1285 use_stmt);
1286 return current_indirect_level;
1289 if (record_accesses)
1290 record_access_alloc_site_info (mi, use_stmt, op2,
1291 index,
1292 current_indirect_level, false);
1295 /* If we are storing this level of indirection mark it as
1296 escaping. */
1297 if (lhs_acc.t_code == MEM_REF || TREE_CODE (lhs) != SSA_NAME)
1299 int l = current_indirect_level;
1301 /* One exception is when we are storing to the matrix
1302 variable itself; this is the case of malloc, we must make
1303 sure that it's the one and only one call to malloc so
1304 we call analyze_matrix_allocation_site to check
1305 this out. */
1306 if (TREE_CODE (lhs) != VAR_DECL || lhs != mi->decl)
1307 mark_min_matrix_escape_level (mi, current_indirect_level,
1308 use_stmt);
1309 else
1311 /* Also update the escaping level. */
1312 analyze_matrix_allocation_site (mi, use_stmt, l, visited);
1313 if (record_accesses)
1314 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1315 NULL_TREE, l, true);
1318 else
1320 /* We are placing it in an SSA, follow that SSA. */
1321 analyze_matrix_accesses (mi, lhs,
1322 current_indirect_level,
1323 rhs_acc.t_code == POINTER_PLUS_EXPR,
1324 visited, record_accesses);
1327 return current_indirect_level;
1330 /* Given a SSA_VAR (coming from a use statement of the matrix MI),
1331 follow its uses and level of indirection and find out the minimum
1332 indirection level it escapes in (the highest dimension) and the maximum
1333 level it is accessed in (this will be the actual dimension of the
1334 matrix). The information is accumulated in MI.
1335 We look at the immediate uses, if one escapes we finish; if not,
1336 we make a recursive call for each one of the immediate uses of the
1337 resulting SSA name. */
1338 static void
1339 analyze_matrix_accesses (struct matrix_info *mi, tree ssa_var,
1340 int current_indirect_level, bool last_op,
1341 sbitmap visited, bool record_accesses)
1343 imm_use_iterator imm_iter;
1344 use_operand_p use_p;
1346 update_type_size (mi, SSA_NAME_DEF_STMT (ssa_var), ssa_var,
1347 current_indirect_level);
1349 /* We don't go beyond the escaping level when we are performing the
1350 flattening. NOTE: we keep the last indirection level that doesn't
1351 escape. */
1352 if (mi->min_indirect_level_escape > -1
1353 && mi->min_indirect_level_escape <= current_indirect_level)
1354 return;
1356 /* Now go over the uses of the SSA_NAME and check how it is used in
1357 each one of them. We are mainly looking for the pattern MEM_REF,
1358 then a POINTER_PLUS_EXPR, then MEM_REF etc. while in between there could
1359 be any number of copies and casts. */
1360 gcc_assert (TREE_CODE (ssa_var) == SSA_NAME);
1362 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_var)
1364 gimple use_stmt = USE_STMT (use_p);
1365 if (gimple_code (use_stmt) == GIMPLE_PHI)
1366 /* We check all the escaping levels that get to the PHI node
1367 and make sure they are all the same escaping;
1368 if not (which is rare) we let the escaping level be the
1369 minimum level that gets into that PHI because starting from
1370 that level we cannot expect the behavior of the indirections. */
1372 analyze_accesses_for_phi_node (mi, use_stmt, current_indirect_level,
1373 visited, record_accesses);
1375 else if (is_gimple_call (use_stmt))
1376 analyze_accesses_for_call_stmt (mi, ssa_var, use_stmt,
1377 current_indirect_level);
1378 else if (is_gimple_assign (use_stmt))
1379 current_indirect_level =
1380 analyze_accesses_for_assign_stmt (mi, ssa_var, use_stmt,
1381 current_indirect_level, last_op,
1382 visited, record_accesses);
1386 typedef struct
1388 tree fn;
1389 gimple stmt;
1390 } check_var_data;
1392 /* A walk_tree function to go over the VAR_DECL, PARM_DECL nodes of
1393 the malloc size expression and check that those aren't changed
1394 over the function. */
1395 static tree
1396 check_var_notmodified_p (tree * tp, int *walk_subtrees, void *data)
1398 basic_block bb;
1399 tree t = *tp;
1400 check_var_data *callback_data = (check_var_data*) data;
1401 tree fn = callback_data->fn;
1402 gimple_stmt_iterator gsi;
1403 gimple stmt;
1405 if (TREE_CODE (t) != VAR_DECL && TREE_CODE (t) != PARM_DECL)
1406 return NULL_TREE;
1408 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (fn))
1410 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1412 stmt = gsi_stmt (gsi);
1413 if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
1414 continue;
1415 if (gimple_get_lhs (stmt) == t)
1417 callback_data->stmt = stmt;
1418 return t;
1422 *walk_subtrees = 1;
1423 return NULL_TREE;
1426 /* Go backwards in the use-def chains and find out the expression
1427 represented by the possible SSA name in STMT, until it is composed
1428 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1429 we make sure that all the arguments represent the same subexpression,
1430 otherwise we fail. */
1432 static tree
1433 can_calculate_stmt_before_stmt (gimple stmt, sbitmap visited)
1435 tree op1, op2, res;
1436 enum tree_code code;
1438 switch (gimple_code (stmt))
1440 case GIMPLE_ASSIGN:
1441 code = gimple_assign_rhs_code (stmt);
1442 op1 = gimple_assign_rhs1 (stmt);
1444 switch (code)
1446 case POINTER_PLUS_EXPR:
1447 case PLUS_EXPR:
1448 case MINUS_EXPR:
1449 case MULT_EXPR:
1451 op2 = gimple_assign_rhs2 (stmt);
1452 op1 = can_calculate_expr_before_stmt (op1, visited);
1453 if (!op1)
1454 return NULL_TREE;
1455 op2 = can_calculate_expr_before_stmt (op2, visited);
1456 if (op2)
1457 return fold_build2 (code, gimple_expr_type (stmt), op1, op2);
1458 return NULL_TREE;
1460 CASE_CONVERT:
1461 res = can_calculate_expr_before_stmt (op1, visited);
1462 if (res != NULL_TREE)
1463 return build1 (code, gimple_expr_type (stmt), res);
1464 else
1465 return NULL_TREE;
1467 default:
1468 if (gimple_assign_single_p (stmt))
1469 return can_calculate_expr_before_stmt (op1, visited);
1470 else
1471 return NULL_TREE;
1474 case GIMPLE_PHI:
1476 size_t j;
1478 res = NULL_TREE;
1479 /* Make sure all the arguments represent the same value. */
1480 for (j = 0; j < gimple_phi_num_args (stmt); j++)
1482 tree new_res;
1483 tree def = PHI_ARG_DEF (stmt, j);
1485 new_res = can_calculate_expr_before_stmt (def, visited);
1486 if (res == NULL_TREE)
1487 res = new_res;
1488 else if (!new_res || !expressions_equal_p (res, new_res))
1489 return NULL_TREE;
1491 return res;
1494 default:
1495 return NULL_TREE;
1499 /* Go backwards in the use-def chains and find out the expression
1500 represented by the possible SSA name in EXPR, until it is composed
1501 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1502 we make sure that all the arguments represent the same subexpression,
1503 otherwise we fail. */
1504 static tree
1505 can_calculate_expr_before_stmt (tree expr, sbitmap visited)
1507 gimple def_stmt;
1508 tree res;
1510 switch (TREE_CODE (expr))
1512 case SSA_NAME:
1513 /* Case of loop, we don't know to represent this expression. */
1514 if (TEST_BIT (visited, SSA_NAME_VERSION (expr)))
1515 return NULL_TREE;
1517 SET_BIT (visited, SSA_NAME_VERSION (expr));
1518 def_stmt = SSA_NAME_DEF_STMT (expr);
1519 res = can_calculate_stmt_before_stmt (def_stmt, visited);
1520 RESET_BIT (visited, SSA_NAME_VERSION (expr));
1521 return res;
1522 case VAR_DECL:
1523 case PARM_DECL:
1524 case INTEGER_CST:
1525 return expr;
1527 default:
1528 return NULL_TREE;
1532 /* There should be only one allocation function for the dimensions
1533 that don't escape. Here we check the allocation sites in this
1534 function. We must make sure that all the dimensions are allocated
1535 using malloc and that the malloc size parameter expression could be
1536 pre-calculated before the call to the malloc of dimension 0.
1538 Given a candidate matrix for flattening -- MI -- check if it's
1539 appropriate for flattening -- we analyze the allocation
1540 sites that we recorded in the previous analysis. The result of the
1541 analysis is a level of indirection (matrix dimension) in which the
1542 flattening is safe. We check the following conditions:
1543 1. There is only one allocation site for each dimension.
1544 2. The allocation sites of all the dimensions are in the same
1545 function.
1546 (The above two are being taken care of during the analysis when
1547 we check the allocation site).
1548 3. All the dimensions that we flatten are allocated at once; thus
1549 the total size must be known before the allocation of the
1550 dimension 0 (top level) -- we must make sure we represent the
1551 size of the allocation as an expression of global parameters or
1552 constants and that those doesn't change over the function. */
1554 static int
1555 check_allocation_function (void **slot, void *data ATTRIBUTE_UNUSED)
1557 int level;
1558 struct matrix_info *mi = (struct matrix_info *) *slot;
1559 sbitmap visited;
1561 if (!mi->malloc_for_level)
1562 return 1;
1564 visited = sbitmap_alloc (num_ssa_names);
1566 /* Do nothing if the current function is not the allocation
1567 function of MI. */
1568 if (mi->allocation_function_decl != current_function_decl
1569 /* We aren't in the main allocation function yet. */
1570 || !mi->malloc_for_level[0])
1571 return 1;
1573 for (level = 1; level < mi->max_malloced_level; level++)
1574 if (!mi->malloc_for_level[level])
1575 break;
1577 mark_min_matrix_escape_level (mi, level, NULL);
1579 /* Check if the expression of the size passed to malloc could be
1580 pre-calculated before the malloc of level 0. */
1581 for (level = 1; level < mi->min_indirect_level_escape; level++)
1583 gimple call_stmt;
1584 tree size;
1585 struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE};
1587 call_stmt = mi->malloc_for_level[level];
1589 /* Find the correct malloc information. */
1590 collect_data_for_malloc_call (call_stmt, &mcd);
1592 /* No need to check anticipation for constants. */
1593 if (TREE_CODE (mcd.size_var) == INTEGER_CST)
1595 if (!mi->dimension_size)
1597 mi->dimension_size =
1598 (tree *) xcalloc (mi->min_indirect_level_escape,
1599 sizeof (tree));
1600 mi->dimension_size_orig =
1601 (tree *) xcalloc (mi->min_indirect_level_escape,
1602 sizeof (tree));
1604 mi->dimension_size[level] = mcd.size_var;
1605 mi->dimension_size_orig[level] = mcd.size_var;
1606 continue;
1608 /* ??? Here we should also add the way to calculate the size
1609 expression not only know that it is anticipated. */
1610 sbitmap_zero (visited);
1611 size = can_calculate_expr_before_stmt (mcd.size_var, visited);
1612 if (size == NULL_TREE)
1614 mark_min_matrix_escape_level (mi, level, call_stmt);
1615 if (dump_file)
1616 fprintf (dump_file,
1617 "Matrix %s: Cannot calculate the size of allocation, escaping at level %d\n",
1618 get_name (mi->decl), level);
1619 break;
1621 if (!mi->dimension_size)
1623 mi->dimension_size =
1624 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1625 mi->dimension_size_orig =
1626 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1628 mi->dimension_size[level] = size;
1629 mi->dimension_size_orig[level] = size;
1632 /* We don't need those anymore. */
1633 for (level = mi->min_indirect_level_escape;
1634 level < mi->max_malloced_level; level++)
1635 mi->malloc_for_level[level] = NULL;
1636 return 1;
1639 /* Track all access and allocation sites. */
1640 static void
1641 find_sites_in_func (bool record)
1643 sbitmap visited_stmts_1;
1645 gimple_stmt_iterator gsi;
1646 gimple stmt;
1647 basic_block bb;
1648 struct matrix_info tmpmi, *mi;
1650 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1652 FOR_EACH_BB (bb)
1654 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1656 tree lhs;
1658 stmt = gsi_stmt (gsi);
1659 lhs = gimple_get_lhs (stmt);
1660 if (lhs != NULL_TREE
1661 && TREE_CODE (lhs) == VAR_DECL)
1663 tmpmi.decl = lhs;
1664 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1665 &tmpmi)))
1667 sbitmap_zero (visited_stmts_1);
1668 analyze_matrix_allocation_site (mi, stmt, 0, visited_stmts_1);
1671 if (is_gimple_assign (stmt)
1672 && gimple_assign_single_p (stmt)
1673 && TREE_CODE (lhs) == SSA_NAME
1674 && TREE_CODE (gimple_assign_rhs1 (stmt)) == VAR_DECL)
1676 tmpmi.decl = gimple_assign_rhs1 (stmt);
1677 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1678 &tmpmi)))
1680 sbitmap_zero (visited_stmts_1);
1681 analyze_matrix_accesses (mi, lhs, 0,
1682 false, visited_stmts_1, record);
1687 sbitmap_free (visited_stmts_1);
1690 /* Traverse the use-def chains to see if there are matrices that
1691 are passed through pointers and we cannot know how they are accessed.
1692 For each SSA-name defined by a global variable of our interest,
1693 we traverse the use-def chains of the SSA and follow the indirections,
1694 and record in what level of indirection the use of the variable
1695 escapes. A use of a pointer escapes when it is passed to a function,
1696 stored into memory or assigned (except in malloc and free calls). */
1698 static void
1699 record_all_accesses_in_func (void)
1701 unsigned i;
1702 sbitmap visited_stmts_1;
1704 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1706 for (i = 0; i < num_ssa_names; i++)
1708 struct matrix_info tmpmi, *mi;
1709 tree ssa_var = ssa_name (i);
1710 tree rhs, lhs;
1712 if (!ssa_var
1713 || !is_gimple_assign (SSA_NAME_DEF_STMT (ssa_var))
1714 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (ssa_var)))
1715 continue;
1716 rhs = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (ssa_var));
1717 lhs = gimple_assign_lhs (SSA_NAME_DEF_STMT (ssa_var));
1718 if (TREE_CODE (rhs) != VAR_DECL && TREE_CODE (lhs) != VAR_DECL)
1719 continue;
1721 /* If the RHS is a matrix that we want to analyze, follow the def-use
1722 chain for this SSA_VAR and check for escapes or apply the
1723 flattening. */
1724 tmpmi.decl = rhs;
1725 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi)))
1727 /* This variable will track the visited PHI nodes, so we can limit
1728 its size to the maximum number of SSA names. */
1729 sbitmap_zero (visited_stmts_1);
1730 analyze_matrix_accesses (mi, ssa_var,
1731 0, false, visited_stmts_1, true);
1735 sbitmap_free (visited_stmts_1);
1738 /* Used when we want to convert the expression: RESULT = something *
1739 ORIG to RESULT = something * NEW_VAL. If ORIG and NEW_VAL are power
1740 of 2, shift operations can be done, else division and
1741 multiplication. */
1743 static tree
1744 compute_offset (HOST_WIDE_INT orig, HOST_WIDE_INT new_val, tree result)
1747 int x, y;
1748 tree result1, ratio, log, orig_tree, new_tree;
1750 x = exact_log2 (orig);
1751 y = exact_log2 (new_val);
1753 if (x != -1 && y != -1)
1755 if (x == y)
1756 return result;
1757 else if (x > y)
1759 log = build_int_cst (TREE_TYPE (result), x - y);
1760 result1 =
1761 fold_build2 (LSHIFT_EXPR, TREE_TYPE (result), result, log);
1762 return result1;
1764 log = build_int_cst (TREE_TYPE (result), y - x);
1765 result1 = fold_build2 (RSHIFT_EXPR, TREE_TYPE (result), result, log);
1767 return result1;
1769 orig_tree = build_int_cst (TREE_TYPE (result), orig);
1770 new_tree = build_int_cst (TREE_TYPE (result), new_val);
1771 ratio = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (result), result, orig_tree);
1772 result1 = fold_build2 (MULT_EXPR, TREE_TYPE (result), ratio, new_tree);
1774 return result1;
1778 /* We know that we are allowed to perform matrix flattening (according to the
1779 escape analysis), so we traverse the use-def chains of the SSA vars
1780 defined by the global variables pointing to the matrices of our interest.
1781 in each use of the SSA we calculate the offset from the base address
1782 according to the following equation:
1784 a[I1][I2]...[Ik] , where D1..Dk is the length of each dimension and the
1785 escaping level is m <= k, and a' is the new allocated matrix,
1786 will be translated to :
1788 b[I(m+1)]...[Ik]
1790 where
1791 b = a' + I1*D2...*Dm + I2*D3...Dm + ... + Im
1794 static int
1795 transform_access_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1797 gimple_stmt_iterator gsi;
1798 struct matrix_info *mi = (struct matrix_info *) *slot;
1799 int min_escape_l = mi->min_indirect_level_escape;
1800 struct access_site_info *acc_info;
1801 enum tree_code code;
1802 int i;
1804 if (min_escape_l < 2 || !mi->access_l)
1805 return 1;
1806 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
1807 i++)
1809 /* This is possible because we collect the access sites before
1810 we determine the final minimum indirection level. */
1811 if (acc_info->level >= min_escape_l)
1813 free (acc_info);
1814 continue;
1816 if (acc_info->is_alloc)
1818 if (acc_info->level >= 0 && gimple_bb (acc_info->stmt))
1820 ssa_op_iter iter;
1821 tree def;
1822 gimple stmt = acc_info->stmt;
1823 tree lhs;
1825 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
1826 mark_sym_for_renaming (SSA_NAME_VAR (def));
1827 gsi = gsi_for_stmt (stmt);
1828 gcc_assert (is_gimple_assign (acc_info->stmt));
1829 lhs = gimple_assign_lhs (acc_info->stmt);
1830 if (TREE_CODE (lhs) == SSA_NAME
1831 && acc_info->level < min_escape_l - 1)
1833 imm_use_iterator imm_iter;
1834 use_operand_p use_p;
1835 gimple use_stmt;
1837 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
1838 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1840 tree rhs, tmp;
1841 gimple new_stmt;
1843 gcc_assert (gimple_assign_rhs_code (acc_info->stmt)
1844 == MEM_REF);
1845 /* Emit convert statement to convert to type of use. */
1846 tmp = create_tmp_var (TREE_TYPE (lhs), "new");
1847 add_referenced_var (tmp);
1848 rhs = gimple_assign_rhs1 (acc_info->stmt);
1849 rhs = fold_convert (TREE_TYPE (tmp),
1850 TREE_OPERAND (rhs, 0));
1851 new_stmt = gimple_build_assign (tmp, rhs);
1852 tmp = make_ssa_name (tmp, new_stmt);
1853 gimple_assign_set_lhs (new_stmt, tmp);
1854 gsi = gsi_for_stmt (acc_info->stmt);
1855 gsi_insert_after (&gsi, new_stmt, GSI_SAME_STMT);
1856 SET_USE (use_p, tmp);
1859 if (acc_info->level < min_escape_l - 1)
1860 gsi_remove (&gsi, true);
1862 free (acc_info);
1863 continue;
1865 code = gimple_assign_rhs_code (acc_info->stmt);
1866 if (code == MEM_REF
1867 && acc_info->level < min_escape_l - 1)
1869 /* Replace the MEM_REF with NOP (cast) usually we are casting
1870 from "pointer to type" to "type". */
1871 tree t =
1872 build1 (NOP_EXPR, TREE_TYPE (gimple_assign_rhs1 (acc_info->stmt)),
1873 TREE_OPERAND (gimple_assign_rhs1 (acc_info->stmt), 0));
1874 gimple_assign_set_rhs_code (acc_info->stmt, NOP_EXPR);
1875 gimple_assign_set_rhs1 (acc_info->stmt, t);
1877 else if (code == POINTER_PLUS_EXPR
1878 && acc_info->level < (min_escape_l))
1880 imm_use_iterator imm_iter;
1881 use_operand_p use_p;
1883 tree offset;
1884 int k = acc_info->level;
1885 tree num_elements, total_elements;
1886 tree tmp1;
1887 tree d_size = mi->dimension_size[k];
1889 /* We already make sure in the analysis that the first operand
1890 is the base and the second is the offset. */
1891 offset = acc_info->offset;
1892 if (mi->dim_map[k] == min_escape_l - 1)
1894 if (!check_transpose_p || mi->is_transposed_p == false)
1895 tmp1 = offset;
1896 else
1898 tree new_offset;
1900 new_offset =
1901 compute_offset (mi->dimension_type_size[min_escape_l],
1902 mi->dimension_type_size[k + 1], offset);
1904 total_elements = new_offset;
1905 if (new_offset != offset)
1907 gsi = gsi_for_stmt (acc_info->stmt);
1908 tmp1 = force_gimple_operand_gsi (&gsi, total_elements,
1909 true, NULL,
1910 true, GSI_SAME_STMT);
1912 else
1913 tmp1 = offset;
1916 else
1918 d_size = mi->dimension_size[mi->dim_map[k] + 1];
1919 num_elements =
1920 fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, acc_info->index),
1921 fold_convert (sizetype, d_size));
1922 add_referenced_var (d_size);
1923 gsi = gsi_for_stmt (acc_info->stmt);
1924 tmp1 = force_gimple_operand_gsi (&gsi, num_elements, true,
1925 NULL, true, GSI_SAME_STMT);
1927 /* Replace the offset if needed. */
1928 if (tmp1 != offset)
1930 if (TREE_CODE (offset) == SSA_NAME)
1932 gimple use_stmt;
1934 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, offset)
1935 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1936 if (use_stmt == acc_info->stmt)
1937 SET_USE (use_p, tmp1);
1939 else
1941 gcc_assert (TREE_CODE (offset) == INTEGER_CST);
1942 gimple_assign_set_rhs2 (acc_info->stmt, tmp1);
1943 update_stmt (acc_info->stmt);
1947 /* ??? meanwhile this happens because we record the same access
1948 site more than once; we should be using a hash table to
1949 avoid this and insert the STMT of the access site only
1950 once.
1951 else
1952 gcc_unreachable (); */
1953 free (acc_info);
1955 VEC_free (access_site_info_p, heap, mi->access_l);
1957 update_ssa (TODO_update_ssa);
1958 #ifdef ENABLE_CHECKING
1959 verify_ssa (true);
1960 #endif
1961 return 1;
1964 /* Sort A array of counts. Arrange DIM_MAP to reflect the new order. */
1966 static void
1967 sort_dim_hot_level (gcov_type * a, int *dim_map, int n)
1969 int i, j, tmp1;
1970 gcov_type tmp;
1972 for (i = 0; i < n - 1; i++)
1974 for (j = 0; j < n - 1 - i; j++)
1976 if (a[j + 1] < a[j])
1978 tmp = a[j]; /* swap a[j] and a[j+1] */
1979 a[j] = a[j + 1];
1980 a[j + 1] = tmp;
1981 tmp1 = dim_map[j];
1982 dim_map[j] = dim_map[j + 1];
1983 dim_map[j + 1] = tmp1;
1989 /* Replace multiple mallocs (one for each dimension) to one malloc
1990 with the size of DIM1*DIM2*...*DIMN*size_of_element
1991 Make sure that we hold the size in the malloc site inside a
1992 new global variable; this way we ensure that the size doesn't
1993 change and it is accessible from all the other functions that
1994 uses the matrix. Also, the original calls to free are deleted,
1995 and replaced by a new call to free the flattened matrix. */
1997 static int
1998 transform_allocation_sites (void **slot, void *data ATTRIBUTE_UNUSED)
2000 int i;
2001 struct matrix_info *mi;
2002 tree type, oldfn, prev_dim_size;
2003 gimple call_stmt_0, use_stmt;
2004 struct cgraph_node *c_node;
2005 struct cgraph_edge *e;
2006 gimple_stmt_iterator gsi;
2007 struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE};
2008 HOST_WIDE_INT element_size;
2010 imm_use_iterator imm_iter;
2011 use_operand_p use_p;
2012 tree old_size_0, tmp;
2013 int min_escape_l;
2014 int id;
2016 mi = (struct matrix_info *) *slot;
2018 min_escape_l = mi->min_indirect_level_escape;
2020 if (!mi->malloc_for_level)
2021 mi->min_indirect_level_escape = 0;
2023 if (mi->min_indirect_level_escape < 2)
2024 return 1;
2026 mi->dim_map = (int *) xcalloc (mi->min_indirect_level_escape, sizeof (int));
2027 for (i = 0; i < mi->min_indirect_level_escape; i++)
2028 mi->dim_map[i] = i;
2029 if (check_transpose_p)
2031 int i;
2033 if (dump_file)
2035 fprintf (dump_file, "Matrix %s:\n", get_name (mi->decl));
2036 for (i = 0; i < min_escape_l; i++)
2038 fprintf (dump_file, "dim %d before sort ", i);
2039 if (mi->dim_hot_level)
2040 fprintf (dump_file,
2041 "count is " HOST_WIDEST_INT_PRINT_DEC " \n",
2042 mi->dim_hot_level[i]);
2045 sort_dim_hot_level (mi->dim_hot_level, mi->dim_map,
2046 mi->min_indirect_level_escape);
2047 if (dump_file)
2048 for (i = 0; i < min_escape_l; i++)
2050 fprintf (dump_file, "dim %d after sort\n", i);
2051 if (mi->dim_hot_level)
2052 fprintf (dump_file, "count is " HOST_WIDE_INT_PRINT_DEC
2053 " \n", (HOST_WIDE_INT) mi->dim_hot_level[i]);
2055 for (i = 0; i < mi->min_indirect_level_escape; i++)
2057 if (dump_file)
2058 fprintf (dump_file, "dim_map[%d] after sort %d\n", i,
2059 mi->dim_map[i]);
2060 if (mi->dim_map[i] != i)
2062 if (dump_file)
2063 fprintf (dump_file,
2064 "Transposed dimensions: dim %d is now dim %d\n",
2065 mi->dim_map[i], i);
2066 mi->is_transposed_p = true;
2070 else
2072 for (i = 0; i < mi->min_indirect_level_escape; i++)
2073 mi->dim_map[i] = i;
2075 /* Call statement of allocation site of level 0. */
2076 call_stmt_0 = mi->malloc_for_level[0];
2078 /* Finds the correct malloc information. */
2079 collect_data_for_malloc_call (call_stmt_0, &mcd);
2081 mi->dimension_size[0] = mcd.size_var;
2082 mi->dimension_size_orig[0] = mcd.size_var;
2083 /* Make sure that the variables in the size expression for
2084 all the dimensions (above level 0) aren't modified in
2085 the allocation function. */
2086 for (i = 1; i < mi->min_indirect_level_escape; i++)
2088 tree t;
2089 check_var_data data;
2091 /* mi->dimension_size must contain the expression of the size calculated
2092 in check_allocation_function. */
2093 gcc_assert (mi->dimension_size[i]);
2095 data.fn = mi->allocation_function_decl;
2096 data.stmt = NULL;
2097 t = walk_tree_without_duplicates (&(mi->dimension_size[i]),
2098 check_var_notmodified_p,
2099 &data);
2100 if (t != NULL_TREE)
2102 mark_min_matrix_escape_level (mi, i, data.stmt);
2103 break;
2107 if (mi->min_indirect_level_escape < 2)
2108 return 1;
2110 /* Since we should make sure that the size expression is available
2111 before the call to malloc of level 0. */
2112 gsi = gsi_for_stmt (call_stmt_0);
2114 /* Find out the size of each dimension by looking at the malloc
2115 sites and create a global variable to hold it.
2116 We add the assignment to the global before the malloc of level 0. */
2118 /* To be able to produce gimple temporaries. */
2119 oldfn = current_function_decl;
2120 current_function_decl = mi->allocation_function_decl;
2121 push_cfun (DECL_STRUCT_FUNCTION (mi->allocation_function_decl));
2123 /* Set the dimension sizes as follows:
2124 DIM_SIZE[i] = DIM_SIZE[n] * ... * DIM_SIZE[i]
2125 where n is the maximum non escaping level. */
2126 element_size = mi->dimension_type_size[mi->min_indirect_level_escape];
2127 prev_dim_size = NULL_TREE;
2129 for (i = mi->min_indirect_level_escape - 1; i >= 0; i--)
2131 tree dim_size, dim_var;
2132 gimple stmt;
2133 tree d_type_size;
2135 /* Now put the size expression in a global variable and initialize it to
2136 the size expression before the malloc of level 0. */
2137 dim_var =
2138 add_new_static_var (TREE_TYPE
2139 (mi->dimension_size_orig[mi->dim_map[i]]));
2140 type = TREE_TYPE (mi->dimension_size_orig[mi->dim_map[i]]);
2142 /* DIM_SIZE = MALLOC_SIZE_PARAM / TYPE_SIZE. */
2143 /* Find which dim ID becomes dim I. */
2144 for (id = 0; id < mi->min_indirect_level_escape; id++)
2145 if (mi->dim_map[id] == i)
2146 break;
2147 d_type_size =
2148 build_int_cst (type, mi->dimension_type_size[id + 1]);
2149 if (!prev_dim_size)
2150 prev_dim_size = build_int_cst (type, element_size);
2151 if (!check_transpose_p && i == mi->min_indirect_level_escape - 1)
2153 dim_size = mi->dimension_size_orig[id];
2155 else
2157 dim_size =
2158 fold_build2 (TRUNC_DIV_EXPR, type, mi->dimension_size_orig[id],
2159 d_type_size);
2161 dim_size = fold_build2 (MULT_EXPR, type, dim_size, prev_dim_size);
2163 dim_size = force_gimple_operand_gsi (&gsi, dim_size, true, NULL,
2164 true, GSI_SAME_STMT);
2165 /* GLOBAL_HOLDING_THE_SIZE = DIM_SIZE. */
2166 stmt = gimple_build_assign (dim_var, dim_size);
2167 mark_symbols_for_renaming (stmt);
2168 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2170 prev_dim_size = mi->dimension_size[i] = dim_var;
2172 update_ssa (TODO_update_ssa);
2173 /* Replace the malloc size argument in the malloc of level 0 to be
2174 the size of all the dimensions. */
2175 c_node = cgraph_node (mi->allocation_function_decl);
2176 old_size_0 = gimple_call_arg (call_stmt_0, 0);
2177 tmp = force_gimple_operand_gsi (&gsi, mi->dimension_size[0], true,
2178 NULL, true, GSI_SAME_STMT);
2179 if (TREE_CODE (old_size_0) == SSA_NAME)
2181 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, old_size_0)
2182 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2183 if (use_stmt == call_stmt_0)
2184 SET_USE (use_p, tmp);
2186 /* When deleting the calls to malloc we need also to remove the edge from
2187 the call graph to keep it consistent. Notice that cgraph_edge may
2188 create a new node in the call graph if there is no node for the given
2189 declaration; this shouldn't be the case but currently there is no way to
2190 check this outside of "cgraph.c". */
2191 for (i = 1; i < mi->min_indirect_level_escape; i++)
2193 gimple_stmt_iterator gsi;
2195 gimple call_stmt = mi->malloc_for_level[i];
2196 gcc_assert (is_gimple_call (call_stmt));
2197 e = cgraph_edge (c_node, call_stmt);
2198 gcc_assert (e);
2199 cgraph_remove_edge (e);
2200 gsi = gsi_for_stmt (call_stmt);
2201 /* Remove the call stmt. */
2202 gsi_remove (&gsi, true);
2203 /* Remove the assignment of the allocated area. */
2204 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2205 gimple_call_lhs (call_stmt))
2207 gsi = gsi_for_stmt (use_stmt);
2208 gsi_remove (&gsi, true);
2211 update_ssa (TODO_update_ssa);
2212 #ifdef ENABLE_CHECKING
2213 verify_ssa (true);
2214 #endif
2215 /* Delete the calls to free. */
2216 for (i = 1; i < mi->min_indirect_level_escape; i++)
2218 gimple_stmt_iterator gsi;
2220 /* ??? wonder why this case is possible but we failed on it once. */
2221 if (!mi->free_stmts[i].stmt)
2222 continue;
2224 c_node = cgraph_node (mi->free_stmts[i].func);
2225 gcc_assert (is_gimple_call (mi->free_stmts[i].stmt));
2226 e = cgraph_edge (c_node, mi->free_stmts[i].stmt);
2227 gcc_assert (e);
2228 cgraph_remove_edge (e);
2229 current_function_decl = mi->free_stmts[i].func;
2230 set_cfun (DECL_STRUCT_FUNCTION (mi->free_stmts[i].func));
2231 gsi = gsi_for_stmt (mi->free_stmts[i].stmt);
2232 gsi_remove (&gsi, true);
2234 /* Return to the previous situation. */
2235 current_function_decl = oldfn;
2236 pop_cfun ();
2237 return 1;
2242 /* Print out the results of the escape analysis. */
2243 static int
2244 dump_matrix_reorg_analysis (void **slot, void *data ATTRIBUTE_UNUSED)
2246 struct matrix_info *mi = (struct matrix_info *) *slot;
2248 if (!dump_file)
2249 return 1;
2250 fprintf (dump_file, "Matrix \"%s\"; Escaping Level: %d, Num Dims: %d,",
2251 get_name (mi->decl), mi->min_indirect_level_escape, mi->num_dims);
2252 fprintf (dump_file, " Malloc Dims: %d, ", mi->max_malloced_level);
2253 fprintf (dump_file, "\n");
2254 if (mi->min_indirect_level_escape >= 2)
2255 fprintf (dump_file, "Flattened %d dimensions \n",
2256 mi->min_indirect_level_escape);
2257 return 1;
2260 /* Perform matrix flattening. */
2262 static unsigned int
2263 matrix_reorg (void)
2265 struct cgraph_node *node;
2267 if (profile_info)
2268 check_transpose_p = true;
2269 else
2270 check_transpose_p = false;
2271 /* If there are hand written vectors, we skip this optimization. */
2272 for (node = cgraph_nodes; node; node = node->next)
2273 if (!may_flatten_matrices (node))
2274 return 0;
2275 matrices_to_reorg = htab_create (37, mtt_info_hash, mtt_info_eq, mat_free);
2276 /* Find and record all potential matrices in the program. */
2277 find_matrices_decl ();
2278 /* Analyze the accesses of the matrices (escaping analysis). */
2279 for (node = cgraph_nodes; node; node = node->next)
2280 if (node->analyzed)
2282 tree temp_fn;
2284 temp_fn = current_function_decl;
2285 current_function_decl = node->decl;
2286 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2287 bitmap_obstack_initialize (NULL);
2288 gimple_register_cfg_hooks ();
2290 if (!gimple_in_ssa_p (cfun))
2292 free_dominance_info (CDI_DOMINATORS);
2293 free_dominance_info (CDI_POST_DOMINATORS);
2294 pop_cfun ();
2295 current_function_decl = temp_fn;
2296 bitmap_obstack_release (NULL);
2298 return 0;
2301 #ifdef ENABLE_CHECKING
2302 verify_flow_info ();
2303 #endif
2305 if (!matrices_to_reorg)
2307 free_dominance_info (CDI_DOMINATORS);
2308 free_dominance_info (CDI_POST_DOMINATORS);
2309 pop_cfun ();
2310 current_function_decl = temp_fn;
2311 bitmap_obstack_release (NULL);
2313 return 0;
2316 /* Create htap for phi nodes. */
2317 htab_mat_acc_phi_nodes = htab_create (37, mat_acc_phi_hash,
2318 mat_acc_phi_eq, free);
2319 if (!check_transpose_p)
2320 find_sites_in_func (false);
2321 else
2323 find_sites_in_func (true);
2324 loop_optimizer_init (LOOPS_NORMAL);
2325 if (current_loops)
2326 scev_initialize ();
2327 htab_traverse (matrices_to_reorg, analyze_transpose, NULL);
2328 if (current_loops)
2330 scev_finalize ();
2331 loop_optimizer_finalize ();
2332 current_loops = NULL;
2335 /* If the current function is the allocation function for any of
2336 the matrices we check its allocation and the escaping level. */
2337 htab_traverse (matrices_to_reorg, check_allocation_function, NULL);
2338 free_dominance_info (CDI_DOMINATORS);
2339 free_dominance_info (CDI_POST_DOMINATORS);
2340 pop_cfun ();
2341 current_function_decl = temp_fn;
2342 bitmap_obstack_release (NULL);
2344 htab_traverse (matrices_to_reorg, transform_allocation_sites, NULL);
2345 /* Now transform the accesses. */
2346 for (node = cgraph_nodes; node; node = node->next)
2347 if (node->analyzed)
2349 /* Remember that allocation sites have been handled. */
2350 tree temp_fn;
2352 temp_fn = current_function_decl;
2353 current_function_decl = node->decl;
2354 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2355 bitmap_obstack_initialize (NULL);
2356 gimple_register_cfg_hooks ();
2357 record_all_accesses_in_func ();
2358 htab_traverse (matrices_to_reorg, transform_access_sites, NULL);
2359 cgraph_rebuild_references ();
2360 free_dominance_info (CDI_DOMINATORS);
2361 free_dominance_info (CDI_POST_DOMINATORS);
2362 pop_cfun ();
2363 current_function_decl = temp_fn;
2364 bitmap_obstack_release (NULL);
2366 htab_traverse (matrices_to_reorg, dump_matrix_reorg_analysis, NULL);
2368 current_function_decl = NULL;
2369 set_cfun (NULL);
2370 matrices_to_reorg = NULL;
2371 return 0;
2375 /* The condition for matrix flattening to be performed. */
2376 static bool
2377 gate_matrix_reorg (void)
2379 return flag_ipa_matrix_reorg && flag_whole_program;
2382 struct simple_ipa_opt_pass pass_ipa_matrix_reorg =
2385 SIMPLE_IPA_PASS,
2386 "matrix-reorg", /* name */
2387 gate_matrix_reorg, /* gate */
2388 matrix_reorg, /* execute */
2389 NULL, /* sub */
2390 NULL, /* next */
2391 0, /* static_pass_number */
2392 TV_NONE, /* tv_id */
2393 0, /* properties_required */
2394 0, /* properties_provided */
2395 0, /* properties_destroyed */
2396 0, /* todo_flags_start */
2397 TODO_dump_cgraph | TODO_dump_func /* todo_flags_finish */