2008-07-29 Manuel Lopez-Ibanez <manu@gcc.gnu.org>
[official-gcc.git] / gcc / matrix-reorg.c
blob9ebbcde560859d267e18cc861a301f13c6c84f23
1 /* Matrix layout transformations.
2 Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
3 Contributed by Razya Ladelsky <razya@il.ibm.com>
4 Originally written by Revital Eres and Mustafa Hagog.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
23 Matrix flattening optimization tries to replace a N-dimensional
24 matrix with its equivalent M-dimensional matrix, where M < N.
25 This first implementation focuses on global matrices defined dynamically.
27 When N==1, we actually flatten the whole matrix.
28 For instance consider a two-dimensional array a [dim1] [dim2].
29 The code for allocating space for it usually looks like:
31 a = (int **) malloc(dim1 * sizeof(int *));
32 for (i=0; i<dim1; i++)
33 a[i] = (int *) malloc (dim2 * sizeof(int));
35 If the array "a" is found suitable for this optimization,
36 its allocation is replaced by:
38 a = (int *) malloc (dim1 * dim2 *sizeof(int));
40 and all the references to a[i][j] are replaced by a[i * dim2 + j].
42 The two main phases of the optimization are the analysis
43 and transformation.
44 The driver of the optimization is matrix_reorg ().
48 Analysis phase:
49 ===============
51 We'll number the dimensions outside-in, meaning the most external
52 is 0, then 1, and so on.
53 The analysis part of the optimization determines K, the escape
54 level of a N-dimensional matrix (K <= N), that allows flattening of
55 the external dimensions 0,1,..., K-1. Escape level 0 means that the
56 whole matrix escapes and no flattening is possible.
58 The analysis part is implemented in analyze_matrix_allocation_site()
59 and analyze_matrix_accesses().
61 Transformation phase:
62 =====================
63 In this phase we define the new flattened matrices that replace the
64 original matrices in the code.
65 Implemented in transform_allocation_sites(),
66 transform_access_sites().
68 Matrix Transposing
69 ==================
70 The idea of Matrix Transposing is organizing the matrix in a different
71 layout such that the dimensions are reordered.
72 This could produce better cache behavior in some cases.
74 For example, lets look at the matrix accesses in the following loop:
76 for (i=0; i<N; i++)
77 for (j=0; j<M; j++)
78 access to a[i][j]
80 This loop can produce good cache behavior because the elements of
81 the inner dimension are accessed sequentially.
83 However, if the accesses of the matrix were of the following form:
85 for (i=0; i<N; i++)
86 for (j=0; j<M; j++)
87 access to a[j][i]
89 In this loop we iterate the columns and not the rows.
90 Therefore, replacing the rows and columns
91 would have had an organization with better (cache) locality.
92 Replacing the dimensions of the matrix is called matrix transposing.
94 This example, of course, could be enhanced to multiple dimensions matrices
95 as well.
97 Since a program could include all kind of accesses, there is a decision
98 mechanism, implemented in analyze_transpose(), which implements a
99 heuristic that tries to determine whether to transpose the matrix or not,
100 according to the form of the more dominant accesses.
101 This decision is transferred to the flattening mechanism, and whether
102 the matrix was transposed or not, the matrix is flattened (if possible).
104 This decision making is based on profiling information and loop information.
105 If profiling information is available, decision making mechanism will be
106 operated, otherwise the matrix will only be flattened (if possible).
108 Both optimizations are described in the paper "Matrix flattening and
109 transposing in GCC" which was presented in GCC summit 2006.
110 http://www.gccsummit.org/2006/2006-GCC-Summit-Proceedings.pdf. */
112 #include "config.h"
113 #include "system.h"
114 #include "coretypes.h"
115 #include "tm.h"
116 #include "tree.h"
117 #include "rtl.h"
118 #include "c-tree.h"
119 #include "tree-inline.h"
120 #include "tree-flow.h"
121 #include "tree-flow-inline.h"
122 #include "langhooks.h"
123 #include "hashtab.h"
124 #include "toplev.h"
125 #include "flags.h"
126 #include "ggc.h"
127 #include "debug.h"
128 #include "target.h"
129 #include "cgraph.h"
130 #include "diagnostic.h"
131 #include "timevar.h"
132 #include "params.h"
133 #include "fibheap.h"
134 #include "c-common.h"
135 #include "intl.h"
136 #include "function.h"
137 #include "basic-block.h"
138 #include "cfgloop.h"
139 #include "tree-iterator.h"
140 #include "tree-pass.h"
141 #include "opts.h"
142 #include "tree-data-ref.h"
143 #include "tree-chrec.h"
144 #include "tree-scalar-evolution.h"
146 /* FIXME tuples. */
147 #if 0
148 /* We need to collect a lot of data from the original malloc,
149 particularly as the gimplifier has converted:
151 orig_var = (struct_type *) malloc (x * sizeof (struct_type *));
153 into
155 T3 = <constant> ; ** <constant> is amount to malloc; precomputed **
156 T4 = malloc (T3);
157 T5 = (struct_type *) T4;
158 orig_var = T5;
160 The following struct fields allow us to collect all the necessary data from
161 the gimplified program. The comments in the struct below are all based
162 on the gimple example above. */
164 struct malloc_call_data
166 tree call_stmt; /* Tree for "T4 = malloc (T3);" */
167 tree size_var; /* Var decl for T3. */
168 tree malloc_size; /* Tree for "<constant>", the rhs assigned to T3. */
171 /* The front end of the compiler, when parsing statements of the form:
173 var = (type_cast) malloc (sizeof (type));
175 always converts this single statement into the following statements
176 (GIMPLE form):
178 T.1 = sizeof (type);
179 T.2 = malloc (T.1);
180 T.3 = (type_cast) T.2;
181 var = T.3;
183 Since we need to create new malloc statements and modify the original
184 statements somewhat, we need to find all four of the above statements.
185 Currently record_call_1 (called for building cgraph edges) finds and
186 records the statements containing the actual call to malloc, but we
187 need to find the rest of the variables/statements on our own. That
188 is what the following function does. */
189 static void
190 collect_data_for_malloc_call (tree stmt, struct malloc_call_data *m_data)
192 tree size_var = NULL;
193 tree malloc_fn_decl;
194 tree tmp;
195 tree arg1;
197 gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
199 tmp = get_call_expr_in (stmt);
200 malloc_fn_decl = CALL_EXPR_FN (tmp);
201 if (TREE_CODE (malloc_fn_decl) != ADDR_EXPR
202 || TREE_CODE (TREE_OPERAND (malloc_fn_decl, 0)) != FUNCTION_DECL
203 || DECL_FUNCTION_CODE (TREE_OPERAND (malloc_fn_decl, 0)) !=
204 BUILT_IN_MALLOC)
205 return;
207 arg1 = CALL_EXPR_ARG (tmp, 0);
208 size_var = arg1;
210 m_data->call_stmt = stmt;
211 m_data->size_var = size_var;
212 if (TREE_CODE (size_var) != VAR_DECL)
213 m_data->malloc_size = size_var;
214 else
215 m_data->malloc_size = NULL_TREE;
218 /* Information about matrix access site.
219 For example: if an access site of matrix arr is arr[i][j]
220 the ACCESS_SITE_INFO structure will have the address
221 of arr as its stmt. The INDEX_INFO will hold information about the
222 initial address and index of each dimension. */
223 struct access_site_info
225 /* The statement (INDIRECT_REF or POINTER_PLUS_EXPR). */
226 tree stmt;
228 /* In case of POINTER_PLUS_EXPR, what is the offset. */
229 tree offset;
231 /* The index which created the offset. */
232 tree index;
234 /* The indirection level of this statement. */
235 int level;
237 /* TRUE for allocation site FALSE for access site. */
238 bool is_alloc;
240 /* The function containing the access site. */
241 tree function_decl;
243 /* This access is iterated in the inner most loop */
244 bool iterated_by_inner_most_loop_p;
247 typedef struct access_site_info *access_site_info_p;
248 DEF_VEC_P (access_site_info_p);
249 DEF_VEC_ALLOC_P (access_site_info_p, heap);
251 /* Information about matrix to flatten. */
252 struct matrix_info
254 /* Decl tree of this matrix. */
255 tree decl;
256 /* Number of dimensions; number
257 of "*" in the type declaration. */
258 int num_dims;
260 /* Minimum indirection level that escapes, 0 means that
261 the whole matrix escapes, k means that dimensions
262 0 to ACTUAL_DIM - k escapes. */
263 int min_indirect_level_escape;
265 tree min_indirect_level_escape_stmt;
267 /* Is the matrix transposed. */
268 bool is_transposed_p;
270 /* Hold the allocation site for each level (dimension).
271 We can use NUM_DIMS as the upper bound and allocate the array
272 once with this number of elements and no need to use realloc and
273 MAX_MALLOCED_LEVEL. */
274 tree *malloc_for_level;
276 int max_malloced_level;
278 /* The location of the allocation sites (they must be in one
279 function). */
280 tree allocation_function_decl;
282 /* The calls to free for each level of indirection. */
283 struct free_info
285 tree stmt;
286 tree func;
287 } *free_stmts;
289 /* An array which holds for each dimension its size. where
290 dimension 0 is the outer most (one that contains all the others).
292 tree *dimension_size;
294 /* An array which holds for each dimension it's original size
295 (before transposing and flattening take place). */
296 tree *dimension_size_orig;
298 /* An array which holds for each dimension the size of the type of
299 of elements accessed in that level (in bytes). */
300 HOST_WIDE_INT *dimension_type_size;
302 int dimension_type_size_len;
304 /* An array collecting the count of accesses for each dimension. */
305 gcov_type *dim_hot_level;
307 /* An array of the accesses to be flattened.
308 elements are of type "struct access_site_info *". */
309 VEC (access_site_info_p, heap) * access_l;
311 /* A map of how the dimensions will be organized at the end of
312 the analyses. */
313 int *dim_map;
316 /* In each phi node we want to record the indirection level we have when we
317 get to the phi node. Usually we will have phi nodes with more than two
318 arguments, then we must assure that all of them get to the phi node with
319 the same indirection level, otherwise it's not safe to do the flattening.
320 So we record the information regarding the indirection level each time we
321 get to the phi node in this hash table. */
323 struct matrix_access_phi_node
325 tree phi;
326 int indirection_level;
329 /* We use this structure to find if the SSA variable is accessed inside the
330 tree and record the tree containing it. */
332 struct ssa_acc_in_tree
334 /* The variable whose accesses in the tree we are looking for. */
335 tree ssa_var;
336 /* The tree and code inside it the ssa_var is accessed, currently
337 it could be an INDIRECT_REF or CALL_EXPR. */
338 enum tree_code t_code;
339 tree t_tree;
340 /* The place in the containing tree. */
341 tree *tp;
342 tree second_op;
343 bool var_found;
346 static void analyze_matrix_accesses (struct matrix_info *, tree, int, bool,
347 sbitmap, bool);
348 static int transform_allocation_sites (void **, void *);
349 static int transform_access_sites (void **, void *);
350 static int analyze_transpose (void **, void *);
351 static int dump_matrix_reorg_analysis (void **, void *);
353 static bool check_transpose_p;
355 /* Hash function used for the phi nodes. */
357 static hashval_t
358 mat_acc_phi_hash (const void *p)
360 const struct matrix_access_phi_node *const ma_phi =
361 (const struct matrix_access_phi_node *) p;
363 return htab_hash_pointer (ma_phi->phi);
366 /* Equality means phi node pointers are the same. */
368 static int
369 mat_acc_phi_eq (const void *p1, const void *p2)
371 const struct matrix_access_phi_node *const phi1 =
372 (const struct matrix_access_phi_node *) p1;
373 const struct matrix_access_phi_node *const phi2 =
374 (const struct matrix_access_phi_node *) p2;
376 if (phi1->phi == phi2->phi)
377 return 1;
379 return 0;
382 /* Hold the PHI nodes we visit during the traversal for escaping
383 analysis. */
384 static htab_t htab_mat_acc_phi_nodes = NULL;
386 /* This hash-table holds the information about the matrices we are
387 going to handle. */
388 static htab_t matrices_to_reorg = NULL;
390 /* Return a hash for MTT, which is really a "matrix_info *". */
391 static hashval_t
392 mtt_info_hash (const void *mtt)
394 return htab_hash_pointer (((const struct matrix_info *) mtt)->decl);
397 /* Return true if MTT1 and MTT2 (which are really both of type
398 "matrix_info *") refer to the same decl. */
399 static int
400 mtt_info_eq (const void *mtt1, const void *mtt2)
402 const struct matrix_info *const i1 = (const struct matrix_info *) mtt1;
403 const struct matrix_info *const i2 = (const struct matrix_info *) mtt2;
405 if (i1->decl == i2->decl)
406 return true;
408 return false;
411 /* Return the inner most tree that is not a cast. */
412 static tree
413 get_inner_of_cast_expr (tree t)
415 while (CONVERT_EXPR_P (t)
416 || TREE_CODE (t) == VIEW_CONVERT_EXPR)
417 t = TREE_OPERAND (t, 0);
419 return t;
422 /* Return false if STMT may contain a vector expression.
423 In this situation, all matrices should not be flattened. */
424 static bool
425 may_flatten_matrices_1 (tree stmt)
427 tree t;
429 switch (TREE_CODE (stmt))
431 case GIMPLE_MODIFY_STMT:
432 t = TREE_OPERAND (stmt, 1);
433 while (CONVERT_EXPR_P (t))
435 if (TREE_TYPE (t) && POINTER_TYPE_P (TREE_TYPE (t)))
437 tree pointee;
439 pointee = TREE_TYPE (t);
440 while (POINTER_TYPE_P (pointee))
441 pointee = TREE_TYPE (pointee);
442 if (TREE_CODE (pointee) == VECTOR_TYPE)
444 if (dump_file)
445 fprintf (dump_file,
446 "Found vector type, don't flatten matrix\n");
447 return false;
450 t = TREE_OPERAND (t, 0);
452 break;
453 case ASM_EXPR:
454 /* Asm code could contain vector operations. */
455 return false;
456 break;
457 default:
458 break;
460 return true;
463 /* Return false if there are hand-written vectors in the program.
464 We disable the flattening in such a case. */
465 static bool
466 may_flatten_matrices (struct cgraph_node *node)
468 tree decl;
469 struct function *func;
470 basic_block bb;
471 block_stmt_iterator bsi;
473 decl = node->decl;
474 if (node->analyzed)
476 func = DECL_STRUCT_FUNCTION (decl);
477 FOR_EACH_BB_FN (bb, func)
478 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
479 if (!may_flatten_matrices_1 (bsi_stmt (bsi)))
480 return false;
482 return true;
485 /* Given a VAR_DECL, check its type to determine whether it is
486 a definition of a dynamic allocated matrix and therefore is
487 a suitable candidate for the matrix flattening optimization.
488 Return NULL if VAR_DECL is not such decl. Otherwise, allocate
489 a MATRIX_INFO structure, fill it with the relevant information
490 and return a pointer to it.
491 TODO: handle also statically defined arrays. */
492 static struct matrix_info *
493 analyze_matrix_decl (tree var_decl)
495 struct matrix_info *m_node, tmpmi, *mi;
496 tree var_type;
497 int dim_num = 0;
499 gcc_assert (matrices_to_reorg);
501 if (TREE_CODE (var_decl) == PARM_DECL)
502 var_type = DECL_ARG_TYPE (var_decl);
503 else if (TREE_CODE (var_decl) == VAR_DECL)
504 var_type = TREE_TYPE (var_decl);
505 else
506 return NULL;
508 if (!POINTER_TYPE_P (var_type))
509 return NULL;
511 while (POINTER_TYPE_P (var_type))
513 var_type = TREE_TYPE (var_type);
514 dim_num++;
517 if (dim_num <= 1)
518 return NULL;
520 if (!COMPLETE_TYPE_P (var_type)
521 || TREE_CODE (TYPE_SIZE_UNIT (var_type)) != INTEGER_CST)
522 return NULL;
524 /* Check to see if this pointer is already in there. */
525 tmpmi.decl = var_decl;
526 mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi);
528 if (mi)
529 return NULL;
531 /* Record the matrix. */
533 m_node = (struct matrix_info *) xcalloc (1, sizeof (struct matrix_info));
534 m_node->decl = var_decl;
535 m_node->num_dims = dim_num;
536 m_node->free_stmts
537 = (struct free_info *) xcalloc (dim_num, sizeof (struct free_info));
539 /* Init min_indirect_level_escape to -1 to indicate that no escape
540 analysis has been done yet. */
541 m_node->min_indirect_level_escape = -1;
542 m_node->is_transposed_p = false;
544 return m_node;
547 /* Free matrix E. */
548 static void
549 mat_free (void *e)
551 struct matrix_info *mat = (struct matrix_info *) e;
553 if (!mat)
554 return;
556 if (mat->free_stmts)
557 free (mat->free_stmts);
558 if (mat->dim_hot_level)
559 free (mat->dim_hot_level);
560 if (mat->malloc_for_level)
561 free (mat->malloc_for_level);
564 /* Find all potential matrices.
565 TODO: currently we handle only multidimensional
566 dynamically allocated arrays. */
567 static void
568 find_matrices_decl (void)
570 struct matrix_info *tmp;
571 PTR *slot;
572 struct varpool_node *vnode;
574 gcc_assert (matrices_to_reorg);
576 /* For every global variable in the program:
577 Check to see if it's of a candidate type and record it. */
578 for (vnode = varpool_nodes_queue; vnode; vnode = vnode->next_needed)
580 tree var_decl = vnode->decl;
582 if (!var_decl || TREE_CODE (var_decl) != VAR_DECL)
583 continue;
585 if (matrices_to_reorg)
586 if ((tmp = analyze_matrix_decl (var_decl)))
588 if (!TREE_ADDRESSABLE (var_decl))
590 slot = htab_find_slot (matrices_to_reorg, tmp, INSERT);
591 *slot = tmp;
595 return;
598 /* Mark that the matrix MI escapes at level L. */
599 static void
600 mark_min_matrix_escape_level (struct matrix_info *mi, int l, tree s)
602 if (mi->min_indirect_level_escape == -1
603 || (mi->min_indirect_level_escape > l))
605 mi->min_indirect_level_escape = l;
606 mi->min_indirect_level_escape_stmt = s;
610 /* Find if the SSA variable is accessed inside the
611 tree and record the tree containing it.
612 The only relevant uses are the case of SSA_NAME, or SSA inside
613 INDIRECT_REF, CALL_EXPR, PLUS_EXPR, POINTER_PLUS_EXPR, MULT_EXPR. */
614 static void
615 ssa_accessed_in_tree (tree t, struct ssa_acc_in_tree *a)
617 tree call, decl;
618 tree arg;
619 call_expr_arg_iterator iter;
621 a->t_code = TREE_CODE (t);
622 switch (a->t_code)
624 tree op1, op2;
626 case SSA_NAME:
627 if (t == a->ssa_var)
628 a->var_found = true;
629 break;
630 case INDIRECT_REF:
631 if (SSA_VAR_P (TREE_OPERAND (t, 0))
632 && TREE_OPERAND (t, 0) == a->ssa_var)
633 a->var_found = true;
634 break;
635 case CALL_EXPR:
636 FOR_EACH_CALL_EXPR_ARG (arg, iter, t)
638 if (arg == a->ssa_var)
640 a->var_found = true;
641 call = get_call_expr_in (t);
642 if (call && (decl = get_callee_fndecl (call)))
643 a->t_tree = decl;
644 break;
647 break;
648 case POINTER_PLUS_EXPR:
649 case PLUS_EXPR:
650 case MULT_EXPR:
651 op1 = TREE_OPERAND (t, 0);
652 op2 = TREE_OPERAND (t, 1);
654 if (op1 == a->ssa_var)
656 a->var_found = true;
657 a->second_op = op2;
659 else if (op2 == a->ssa_var)
661 a->var_found = true;
662 a->second_op = op1;
664 break;
665 default:
666 break;
670 /* Record the access/allocation site information for matrix MI so we can
671 handle it later in transformation. */
672 static void
673 record_access_alloc_site_info (struct matrix_info *mi, tree stmt, tree offset,
674 tree index, int level, bool is_alloc)
676 struct access_site_info *acc_info;
678 if (!mi->access_l)
679 mi->access_l = VEC_alloc (access_site_info_p, heap, 100);
681 acc_info
682 = (struct access_site_info *)
683 xcalloc (1, sizeof (struct access_site_info));
684 acc_info->stmt = stmt;
685 acc_info->offset = offset;
686 acc_info->index = index;
687 acc_info->function_decl = current_function_decl;
688 acc_info->level = level;
689 acc_info->is_alloc = is_alloc;
691 VEC_safe_push (access_site_info_p, heap, mi->access_l, acc_info);
695 /* Record the malloc as the allocation site of the given LEVEL. But
696 first we Make sure that all the size parameters passed to malloc in
697 all the allocation sites could be pre-calculated before the call to
698 the malloc of level 0 (the main malloc call). */
699 static void
700 add_allocation_site (struct matrix_info *mi, tree stmt, int level)
702 struct malloc_call_data mcd;
704 /* Make sure that the allocation sites are in the same function. */
705 if (!mi->allocation_function_decl)
706 mi->allocation_function_decl = current_function_decl;
707 else if (mi->allocation_function_decl != current_function_decl)
709 int min_malloc_level;
711 gcc_assert (mi->malloc_for_level);
713 /* Find the minimum malloc level that already has been seen;
714 we known its allocation function must be
715 MI->allocation_function_decl since it's different than
716 CURRENT_FUNCTION_DECL then the escaping level should be
717 MIN (LEVEL, MIN_MALLOC_LEVEL) - 1 , and the allocation function
718 must be set accordingly. */
719 for (min_malloc_level = 0;
720 min_malloc_level < mi->max_malloced_level
721 && mi->malloc_for_level[min_malloc_level]; min_malloc_level++);
722 if (level < min_malloc_level)
724 mi->allocation_function_decl = current_function_decl;
725 mark_min_matrix_escape_level (mi, min_malloc_level, stmt);
727 else
729 mark_min_matrix_escape_level (mi, level, stmt);
730 /* cannot be that (level == min_malloc_level)
731 we would have returned earlier. */
732 return;
736 /* Find the correct malloc information. */
737 collect_data_for_malloc_call (stmt, &mcd);
739 /* We accept only calls to malloc function; we do not accept
740 calls like calloc and realloc. */
741 if (!mi->malloc_for_level)
743 mi->malloc_for_level = XCNEWVEC (tree, level + 1);
744 mi->max_malloced_level = level + 1;
746 else if (mi->max_malloced_level <= level)
748 mi->malloc_for_level
749 = XRESIZEVEC (tree, mi->malloc_for_level, level + 1);
751 /* Zero the newly allocated items. */
752 memset (&(mi->malloc_for_level[mi->max_malloced_level + 1]),
753 0, (level - mi->max_malloced_level) * sizeof (tree));
755 mi->max_malloced_level = level + 1;
757 mi->malloc_for_level[level] = stmt;
760 /* Given an assignment statement STMT that we know that its
761 left-hand-side is the matrix MI variable, we traverse the immediate
762 uses backwards until we get to a malloc site. We make sure that
763 there is one and only one malloc site that sets this variable. When
764 we are performing the flattening we generate a new variable that
765 will hold the size for each dimension; each malloc that allocates a
766 dimension has the size parameter; we use that parameter to
767 initialize the dimension size variable so we can use it later in
768 the address calculations. LEVEL is the dimension we're inspecting.
769 Return if STMT is related to an allocation site. */
771 static void
772 analyze_matrix_allocation_site (struct matrix_info *mi, tree stmt,
773 int level, sbitmap visited)
775 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
777 tree rhs = TREE_OPERAND (stmt, 1);
779 rhs = get_inner_of_cast_expr (rhs);
780 if (TREE_CODE (rhs) == SSA_NAME)
782 tree def = SSA_NAME_DEF_STMT (rhs);
784 analyze_matrix_allocation_site (mi, def, level, visited);
785 return;
788 /* A result of call to malloc. */
789 else if (TREE_CODE (rhs) == CALL_EXPR)
791 int call_flags = call_expr_flags (rhs);
793 if (!(call_flags & ECF_MALLOC))
795 mark_min_matrix_escape_level (mi, level, stmt);
796 return;
798 else
800 tree malloc_fn_decl;
801 const char *malloc_fname;
803 malloc_fn_decl = CALL_EXPR_FN (rhs);
804 if (TREE_CODE (malloc_fn_decl) != ADDR_EXPR
805 || TREE_CODE (TREE_OPERAND (malloc_fn_decl, 0)) !=
806 FUNCTION_DECL)
808 mark_min_matrix_escape_level (mi, level, stmt);
809 return;
811 malloc_fn_decl = TREE_OPERAND (malloc_fn_decl, 0);
812 malloc_fname = IDENTIFIER_POINTER (DECL_NAME (malloc_fn_decl));
813 if (DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
815 if (dump_file)
816 fprintf (dump_file,
817 "Matrix %s is an argument to function %s\n",
818 get_name (mi->decl), get_name (malloc_fn_decl));
819 mark_min_matrix_escape_level (mi, level, stmt);
820 return;
823 /* This is a call to malloc of level 'level'.
824 mi->max_malloced_level-1 == level means that we've
825 seen a malloc statement of level 'level' before.
826 If the statement is not the same one that we've
827 seen before, then there's another malloc statement
828 for the same level, which means that we need to mark
829 it escaping. */
830 if (mi->malloc_for_level
831 && mi->max_malloced_level-1 == level
832 && mi->malloc_for_level[level] != stmt)
834 mark_min_matrix_escape_level (mi, level, stmt);
835 return;
837 else
838 add_allocation_site (mi, stmt, level);
839 return;
841 /* If we are back to the original matrix variable then we
842 are sure that this is analyzed as an access site. */
843 else if (rhs == mi->decl)
844 return;
846 /* Looks like we don't know what is happening in this
847 statement so be in the safe side and mark it as escaping. */
848 mark_min_matrix_escape_level (mi, level, stmt);
851 /* The transposing decision making.
852 In order to to calculate the profitability of transposing, we collect two
853 types of information regarding the accesses:
854 1. profiling information used to express the hotness of an access, that
855 is how often the matrix is accessed by this access site (count of the
856 access site).
857 2. which dimension in the access site is iterated by the inner
858 most loop containing this access.
860 The matrix will have a calculated value of weighted hotness for each
861 dimension.
862 Intuitively the hotness level of a dimension is a function of how
863 many times it was the most frequently accessed dimension in the
864 highly executed access sites of this matrix.
866 As computed by following equation:
867 m n
868 __ __
869 \ \ dim_hot_level[i] +=
870 /_ /_
871 j i
872 acc[j]->dim[i]->iter_by_inner_loop * count(j)
874 Where n is the number of dims and m is the number of the matrix
875 access sites. acc[j]->dim[i]->iter_by_inner_loop is 1 if acc[j]
876 iterates over dim[i] in innermost loop, and is 0 otherwise.
878 The organization of the new matrix should be according to the
879 hotness of each dimension. The hotness of the dimension implies
880 the locality of the elements.*/
881 static int
882 analyze_transpose (void **slot, void *data ATTRIBUTE_UNUSED)
884 struct matrix_info *mi = (struct matrix_info *) *slot;
885 int min_escape_l = mi->min_indirect_level_escape;
886 struct loop *loop;
887 affine_iv iv;
888 struct access_site_info *acc_info;
889 int i;
891 if (min_escape_l < 2 || !mi->access_l)
893 if (mi->access_l)
895 for (i = 0;
896 VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
897 i++)
898 free (acc_info);
899 VEC_free (access_site_info_p, heap, mi->access_l);
902 return 1;
904 if (!mi->dim_hot_level)
905 mi->dim_hot_level =
906 (gcov_type *) xcalloc (min_escape_l, sizeof (gcov_type));
909 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
910 i++)
912 if (TREE_CODE (TREE_OPERAND (acc_info->stmt, 1)) == POINTER_PLUS_EXPR
913 && acc_info->level < min_escape_l)
915 loop = loop_containing_stmt (acc_info->stmt);
916 if (!loop || loop->inner)
918 free (acc_info);
919 continue;
921 if (simple_iv (loop, acc_info->stmt, acc_info->offset, &iv, true))
923 if (iv.step != NULL)
925 HOST_WIDE_INT istep;
927 istep = int_cst_value (iv.step);
928 if (istep != 0)
930 acc_info->iterated_by_inner_most_loop_p = 1;
931 mi->dim_hot_level[acc_info->level] +=
932 gimple_bb (acc_info->stmt)->count;
938 free (acc_info);
940 VEC_free (access_site_info_p, heap, mi->access_l);
942 return 1;
945 /* Find the index which defines the OFFSET from base.
946 We walk from use to def until we find how the offset was defined. */
947 static tree
948 get_index_from_offset (tree offset, tree def_stmt)
950 tree op1, op2, expr, index;
952 if (TREE_CODE (def_stmt) == PHI_NODE)
953 return NULL;
954 expr = get_inner_of_cast_expr (TREE_OPERAND (def_stmt, 1));
955 if (TREE_CODE (expr) == SSA_NAME)
956 return get_index_from_offset (offset, SSA_NAME_DEF_STMT (expr));
957 else if (TREE_CODE (expr) == MULT_EXPR)
959 op1 = TREE_OPERAND (expr, 0);
960 op2 = TREE_OPERAND (expr, 1);
961 if (TREE_CODE (op1) != INTEGER_CST && TREE_CODE (op2) != INTEGER_CST)
962 return NULL;
963 index = (TREE_CODE (op1) == INTEGER_CST) ? op2 : op1;
964 return index;
966 else
967 return NULL_TREE;
970 /* update MI->dimension_type_size[CURRENT_INDIRECT_LEVEL] with the size
971 of the type related to the SSA_VAR, or the type related to the
972 lhs of STMT, in the case that it is an INDIRECT_REF. */
973 static void
974 update_type_size (struct matrix_info *mi, tree stmt, tree ssa_var,
975 int current_indirect_level)
977 tree lhs;
978 HOST_WIDE_INT type_size;
980 /* Update type according to the type of the INDIRECT_REF expr. */
981 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
982 && TREE_CODE (TREE_OPERAND (stmt, 0)) == INDIRECT_REF)
984 lhs = TREE_OPERAND (stmt, 0);
985 gcc_assert (POINTER_TYPE_P
986 (TREE_TYPE (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
987 type_size =
988 int_size_in_bytes (TREE_TYPE
989 (TREE_TYPE
990 (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
992 else
993 type_size = int_size_in_bytes (TREE_TYPE (ssa_var));
995 /* Record the size of elements accessed (as a whole)
996 in the current indirection level (dimension). If the size of
997 elements is not known at compile time, mark it as escaping. */
998 if (type_size <= 0)
999 mark_min_matrix_escape_level (mi, current_indirect_level, stmt);
1000 else
1002 int l = current_indirect_level;
1004 if (!mi->dimension_type_size)
1006 mi->dimension_type_size
1007 = (HOST_WIDE_INT *) xcalloc (l + 1, sizeof (HOST_WIDE_INT));
1008 mi->dimension_type_size_len = l + 1;
1010 else if (mi->dimension_type_size_len < l + 1)
1012 mi->dimension_type_size
1013 = (HOST_WIDE_INT *) xrealloc (mi->dimension_type_size,
1014 (l + 1) * sizeof (HOST_WIDE_INT));
1015 memset (&mi->dimension_type_size[mi->dimension_type_size_len],
1016 0, (l + 1 - mi->dimension_type_size_len)
1017 * sizeof (HOST_WIDE_INT));
1018 mi->dimension_type_size_len = l + 1;
1020 /* Make sure all the accesses in the same level have the same size
1021 of the type. */
1022 if (!mi->dimension_type_size[l])
1023 mi->dimension_type_size[l] = type_size;
1024 else if (mi->dimension_type_size[l] != type_size)
1025 mark_min_matrix_escape_level (mi, l, stmt);
1029 /* USE_STMT represents a call_expr ,where one of the arguments is the
1030 ssa var that we want to check because it came from some use of matrix
1031 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so
1032 far. */
1034 static void
1035 analyze_accesses_for_call_expr (struct matrix_info *mi, tree use_stmt,
1036 int current_indirect_level)
1038 tree call = get_call_expr_in (use_stmt);
1039 if (call && get_callee_fndecl (call))
1041 if (DECL_FUNCTION_CODE (get_callee_fndecl (call)) != BUILT_IN_FREE)
1043 if (dump_file)
1044 fprintf (dump_file,
1045 "Matrix %s: Function call %s, level %d escapes.\n",
1046 get_name (mi->decl), get_name (get_callee_fndecl (call)),
1047 current_indirect_level);
1048 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1050 else if (mi->free_stmts[current_indirect_level].stmt != NULL
1051 && mi->free_stmts[current_indirect_level].stmt != use_stmt)
1052 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1053 else
1055 /*Record the free statements so we can delete them
1056 later. */
1057 int l = current_indirect_level;
1059 mi->free_stmts[l].stmt = use_stmt;
1060 mi->free_stmts[l].func = current_function_decl;
1065 /* USE_STMT represents a phi node of the ssa var that we want to
1066 check because it came from some use of matrix
1068 We check all the escaping levels that get to the PHI node
1069 and make sure they are all the same escaping;
1070 if not (which is rare) we let the escaping level be the
1071 minimum level that gets into that PHI because starting from
1072 that level we cannot expect the behavior of the indirections.
1073 CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1075 static void
1076 analyze_accesses_for_phi_node (struct matrix_info *mi, tree use_stmt,
1077 int current_indirect_level, sbitmap visited,
1078 bool record_accesses)
1081 struct matrix_access_phi_node tmp_maphi, *maphi, **pmaphi;
1083 tmp_maphi.phi = use_stmt;
1084 if ((maphi = (struct matrix_access_phi_node *)
1085 htab_find (htab_mat_acc_phi_nodes, &tmp_maphi)))
1087 if (maphi->indirection_level == current_indirect_level)
1088 return;
1089 else
1091 int level = MIN (maphi->indirection_level,
1092 current_indirect_level);
1093 int j;
1094 tree t = NULL_TREE;
1096 maphi->indirection_level = level;
1097 for (j = 0; j < PHI_NUM_ARGS (use_stmt); j++)
1099 tree def = PHI_ARG_DEF (use_stmt, j);
1101 if (TREE_CODE (SSA_NAME_DEF_STMT (def)) != PHI_NODE)
1102 t = SSA_NAME_DEF_STMT (def);
1104 mark_min_matrix_escape_level (mi, level, t);
1106 return;
1108 maphi = (struct matrix_access_phi_node *)
1109 xcalloc (1, sizeof (struct matrix_access_phi_node));
1110 maphi->phi = use_stmt;
1111 maphi->indirection_level = current_indirect_level;
1113 /* Insert to hash table. */
1114 pmaphi = (struct matrix_access_phi_node **)
1115 htab_find_slot (htab_mat_acc_phi_nodes, maphi, INSERT);
1116 gcc_assert (pmaphi);
1117 *pmaphi = maphi;
1119 if (!TEST_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt))))
1121 SET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1122 analyze_matrix_accesses (mi, PHI_RESULT (use_stmt),
1123 current_indirect_level, false, visited,
1124 record_accesses);
1125 RESET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1129 /* USE_STMT represents a modify statement (the rhs or lhs include
1130 the ssa var that we want to check because it came from some use of matrix
1132 CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1134 static int
1135 analyze_accesses_for_modify_stmt (struct matrix_info *mi, tree ssa_var,
1136 tree use_stmt, int current_indirect_level,
1137 bool last_op, sbitmap visited,
1138 bool record_accesses)
1141 tree lhs = TREE_OPERAND (use_stmt, 0);
1142 tree rhs = TREE_OPERAND (use_stmt, 1);
1143 struct ssa_acc_in_tree lhs_acc, rhs_acc;
1145 memset (&lhs_acc, 0, sizeof (lhs_acc));
1146 memset (&rhs_acc, 0, sizeof (rhs_acc));
1148 lhs_acc.ssa_var = ssa_var;
1149 lhs_acc.t_code = ERROR_MARK;
1150 ssa_accessed_in_tree (lhs, &lhs_acc);
1151 rhs_acc.ssa_var = ssa_var;
1152 rhs_acc.t_code = ERROR_MARK;
1153 ssa_accessed_in_tree (get_inner_of_cast_expr (rhs), &rhs_acc);
1155 /* The SSA must be either in the left side or in the right side,
1156 to understand what is happening.
1157 In case the SSA_NAME is found in both sides we should be escaping
1158 at this level because in this case we cannot calculate the
1159 address correctly. */
1160 if ((lhs_acc.var_found && rhs_acc.var_found
1161 && lhs_acc.t_code == INDIRECT_REF)
1162 || (!rhs_acc.var_found && !lhs_acc.var_found))
1164 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1165 return current_indirect_level;
1167 gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1169 /* If we are storing to the matrix at some level, then mark it as
1170 escaping at that level. */
1171 if (lhs_acc.var_found)
1173 tree def;
1174 int l = current_indirect_level + 1;
1176 gcc_assert (lhs_acc.t_code == INDIRECT_REF);
1177 def = get_inner_of_cast_expr (rhs);
1178 if (TREE_CODE (def) != SSA_NAME)
1179 mark_min_matrix_escape_level (mi, l, use_stmt);
1180 else
1182 def = SSA_NAME_DEF_STMT (def);
1183 analyze_matrix_allocation_site (mi, def, l, visited);
1184 if (record_accesses)
1185 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1186 NULL_TREE, l, true);
1187 update_type_size (mi, use_stmt, NULL, l);
1189 return current_indirect_level;
1191 /* Now, check the right-hand-side, to see how the SSA variable
1192 is used. */
1193 if (rhs_acc.var_found)
1195 /* If we are passing the ssa name to a function call and
1196 the pointer escapes when passed to the function
1197 (not the case of free), then we mark the matrix as
1198 escaping at this level. */
1199 if (rhs_acc.t_code == CALL_EXPR)
1201 analyze_accesses_for_call_expr (mi, use_stmt,
1202 current_indirect_level);
1204 return current_indirect_level;
1206 if (rhs_acc.t_code != INDIRECT_REF
1207 && rhs_acc.t_code != POINTER_PLUS_EXPR && rhs_acc.t_code != SSA_NAME)
1209 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1210 return current_indirect_level;
1212 /* If the access in the RHS has an indirection increase the
1213 indirection level. */
1214 if (rhs_acc.t_code == INDIRECT_REF)
1216 if (record_accesses)
1217 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1218 NULL_TREE,
1219 current_indirect_level, true);
1220 current_indirect_level += 1;
1222 else if (rhs_acc.t_code == POINTER_PLUS_EXPR)
1224 gcc_assert (rhs_acc.second_op);
1225 if (last_op)
1226 /* Currently we support only one PLUS expression on the
1227 SSA_NAME that holds the base address of the current
1228 indirection level; to support more general case there
1229 is a need to hold a stack of expressions and regenerate
1230 the calculation later. */
1231 mark_min_matrix_escape_level (mi, current_indirect_level,
1232 use_stmt);
1233 else
1235 tree index;
1236 tree op1, op2;
1238 op1 = TREE_OPERAND (rhs, 0);
1239 op2 = TREE_OPERAND (rhs, 1);
1241 op2 = (op1 == ssa_var) ? op2 : op1;
1242 if (TREE_CODE (op2) == INTEGER_CST)
1243 index =
1244 build_int_cst (TREE_TYPE (op1),
1245 TREE_INT_CST_LOW (op2) /
1246 int_size_in_bytes (TREE_TYPE (op1)));
1247 else
1249 index =
1250 get_index_from_offset (op2, SSA_NAME_DEF_STMT (op2));
1251 if (index == NULL_TREE)
1253 mark_min_matrix_escape_level (mi,
1254 current_indirect_level,
1255 use_stmt);
1256 return current_indirect_level;
1259 if (record_accesses)
1260 record_access_alloc_site_info (mi, use_stmt, op2,
1261 index,
1262 current_indirect_level, false);
1265 /* If we are storing this level of indirection mark it as
1266 escaping. */
1267 if (lhs_acc.t_code == INDIRECT_REF || TREE_CODE (lhs) != SSA_NAME)
1269 int l = current_indirect_level;
1271 /* One exception is when we are storing to the matrix
1272 variable itself; this is the case of malloc, we must make
1273 sure that it's the one and only one call to malloc so
1274 we call analyze_matrix_allocation_site to check
1275 this out. */
1276 if (TREE_CODE (lhs) != VAR_DECL || lhs != mi->decl)
1277 mark_min_matrix_escape_level (mi, current_indirect_level,
1278 use_stmt);
1279 else
1281 /* Also update the escaping level. */
1282 analyze_matrix_allocation_site (mi, use_stmt, l, visited);
1283 if (record_accesses)
1284 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1285 NULL_TREE, l, true);
1288 else
1290 /* We are placing it in an SSA, follow that SSA. */
1291 analyze_matrix_accesses (mi, lhs,
1292 current_indirect_level,
1293 rhs_acc.t_code == POINTER_PLUS_EXPR,
1294 visited, record_accesses);
1297 return current_indirect_level;
1300 /* Given a SSA_VAR (coming from a use statement of the matrix MI),
1301 follow its uses and level of indirection and find out the minimum
1302 indirection level it escapes in (the highest dimension) and the maximum
1303 level it is accessed in (this will be the actual dimension of the
1304 matrix). The information is accumulated in MI.
1305 We look at the immediate uses, if one escapes we finish; if not,
1306 we make a recursive call for each one of the immediate uses of the
1307 resulting SSA name. */
1308 static void
1309 analyze_matrix_accesses (struct matrix_info *mi, tree ssa_var,
1310 int current_indirect_level, bool last_op,
1311 sbitmap visited, bool record_accesses)
1313 imm_use_iterator imm_iter;
1314 use_operand_p use_p;
1316 update_type_size (mi, SSA_NAME_DEF_STMT (ssa_var), ssa_var,
1317 current_indirect_level);
1319 /* We don't go beyond the escaping level when we are performing the
1320 flattening. NOTE: we keep the last indirection level that doesn't
1321 escape. */
1322 if (mi->min_indirect_level_escape > -1
1323 && mi->min_indirect_level_escape <= current_indirect_level)
1324 return;
1326 /* Now go over the uses of the SSA_NAME and check how it is used in
1327 each one of them. We are mainly looking for the pattern INDIRECT_REF,
1328 then a POINTER_PLUS_EXPR, then INDIRECT_REF etc. while in between there could
1329 be any number of copies and casts. */
1330 gcc_assert (TREE_CODE (ssa_var) == SSA_NAME);
1332 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_var)
1334 tree use_stmt = USE_STMT (use_p);
1335 if (TREE_CODE (use_stmt) == PHI_NODE)
1336 /* We check all the escaping levels that get to the PHI node
1337 and make sure they are all the same escaping;
1338 if not (which is rare) we let the escaping level be the
1339 minimum level that gets into that PHI because starting from
1340 that level we cannot expect the behavior of the indirections. */
1342 analyze_accesses_for_phi_node (mi, use_stmt, current_indirect_level,
1343 visited, record_accesses);
1345 else if (TREE_CODE (use_stmt) == CALL_EXPR)
1346 analyze_accesses_for_call_expr (mi, use_stmt, current_indirect_level);
1347 else if (TREE_CODE (use_stmt) == GIMPLE_MODIFY_STMT)
1348 current_indirect_level =
1349 analyze_accesses_for_modify_stmt (mi, ssa_var, use_stmt,
1350 current_indirect_level, last_op,
1351 visited, record_accesses);
1356 /* A walk_tree function to go over the VAR_DECL, PARM_DECL nodes of
1357 the malloc size expression and check that those aren't changed
1358 over the function. */
1359 static tree
1360 check_var_notmodified_p (tree * tp, int *walk_subtrees, void *data)
1362 basic_block bb;
1363 tree t = *tp;
1364 tree fn = (tree) data;
1365 block_stmt_iterator bsi;
1366 tree stmt;
1368 if (TREE_CODE (t) != VAR_DECL && TREE_CODE (t) != PARM_DECL)
1369 return NULL_TREE;
1371 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (fn))
1373 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1375 stmt = bsi_stmt (bsi);
1376 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
1377 continue;
1378 if (TREE_OPERAND (stmt, 0) == t)
1379 return stmt;
1382 *walk_subtrees = 1;
1383 return NULL_TREE;
1386 /* Go backwards in the use-def chains and find out the expression
1387 represented by the possible SSA name in EXPR, until it is composed
1388 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1389 we make sure that all the arguments represent the same subexpression,
1390 otherwise we fail. */
1391 static tree
1392 can_calculate_expr_before_stmt (tree expr, sbitmap visited)
1394 tree def_stmt, op1, op2, res;
1396 switch (TREE_CODE (expr))
1398 case SSA_NAME:
1399 /* Case of loop, we don't know to represent this expression. */
1400 if (TEST_BIT (visited, SSA_NAME_VERSION (expr)))
1401 return NULL_TREE;
1403 SET_BIT (visited, SSA_NAME_VERSION (expr));
1404 def_stmt = SSA_NAME_DEF_STMT (expr);
1405 res = can_calculate_expr_before_stmt (def_stmt, visited);
1406 RESET_BIT (visited, SSA_NAME_VERSION (expr));
1407 return res;
1408 case VAR_DECL:
1409 case PARM_DECL:
1410 case INTEGER_CST:
1411 return expr;
1412 case POINTER_PLUS_EXPR:
1413 case PLUS_EXPR:
1414 case MINUS_EXPR:
1415 case MULT_EXPR:
1416 op1 = TREE_OPERAND (expr, 0);
1417 op2 = TREE_OPERAND (expr, 1);
1419 op1 = can_calculate_expr_before_stmt (op1, visited);
1420 if (!op1)
1421 return NULL_TREE;
1422 op2 = can_calculate_expr_before_stmt (op2, visited);
1423 if (op2)
1424 return fold_build2 (TREE_CODE (expr), TREE_TYPE (expr), op1, op2);
1425 return NULL_TREE;
1426 case GIMPLE_MODIFY_STMT:
1427 return can_calculate_expr_before_stmt (TREE_OPERAND (expr, 1),
1428 visited);
1429 case PHI_NODE:
1431 int j;
1433 res = NULL_TREE;
1434 /* Make sure all the arguments represent the same value. */
1435 for (j = 0; j < PHI_NUM_ARGS (expr); j++)
1437 tree new_res;
1438 tree def = PHI_ARG_DEF (expr, j);
1440 new_res = can_calculate_expr_before_stmt (def, visited);
1441 if (res == NULL_TREE)
1442 res = new_res;
1443 else if (!new_res || !expressions_equal_p (res, new_res))
1444 return NULL_TREE;
1446 return res;
1448 CASE_CONVERT:
1449 res = can_calculate_expr_before_stmt (TREE_OPERAND (expr, 0), visited);
1450 if (res != NULL_TREE)
1451 return build1 (TREE_CODE (expr), TREE_TYPE (expr), res);
1452 else
1453 return NULL_TREE;
1455 default:
1456 return NULL_TREE;
1460 /* There should be only one allocation function for the dimensions
1461 that don't escape. Here we check the allocation sites in this
1462 function. We must make sure that all the dimensions are allocated
1463 using malloc and that the malloc size parameter expression could be
1464 pre-calculated before the call to the malloc of dimension 0.
1466 Given a candidate matrix for flattening -- MI -- check if it's
1467 appropriate for flattening -- we analyze the allocation
1468 sites that we recorded in the previous analysis. The result of the
1469 analysis is a level of indirection (matrix dimension) in which the
1470 flattening is safe. We check the following conditions:
1471 1. There is only one allocation site for each dimension.
1472 2. The allocation sites of all the dimensions are in the same
1473 function.
1474 (The above two are being taken care of during the analysis when
1475 we check the allocation site).
1476 3. All the dimensions that we flatten are allocated at once; thus
1477 the total size must be known before the allocation of the
1478 dimension 0 (top level) -- we must make sure we represent the
1479 size of the allocation as an expression of global parameters or
1480 constants and that those doesn't change over the function. */
1482 static int
1483 check_allocation_function (void **slot, void *data ATTRIBUTE_UNUSED)
1485 int level;
1486 block_stmt_iterator bsi;
1487 basic_block bb_level_0;
1488 struct matrix_info *mi = (struct matrix_info *) *slot;
1489 sbitmap visited;
1491 if (!mi->malloc_for_level)
1492 return 1;
1494 visited = sbitmap_alloc (num_ssa_names);
1496 /* Do nothing if the current function is not the allocation
1497 function of MI. */
1498 if (mi->allocation_function_decl != current_function_decl
1499 /* We aren't in the main allocation function yet. */
1500 || !mi->malloc_for_level[0])
1501 return 1;
1503 for (level = 1; level < mi->max_malloced_level; level++)
1504 if (!mi->malloc_for_level[level])
1505 break;
1507 mark_min_matrix_escape_level (mi, level, NULL_TREE);
1509 bsi = bsi_for_stmt (mi->malloc_for_level[0]);
1510 bb_level_0 = bsi.bb;
1512 /* Check if the expression of the size passed to malloc could be
1513 pre-calculated before the malloc of level 0. */
1514 for (level = 1; level < mi->min_indirect_level_escape; level++)
1516 tree call_stmt, size;
1517 struct malloc_call_data mcd;
1519 call_stmt = mi->malloc_for_level[level];
1521 /* Find the correct malloc information. */
1522 collect_data_for_malloc_call (call_stmt, &mcd);
1524 /* No need to check anticipation for constants. */
1525 if (TREE_CODE (mcd.size_var) == INTEGER_CST)
1527 if (!mi->dimension_size)
1529 mi->dimension_size =
1530 (tree *) xcalloc (mi->min_indirect_level_escape,
1531 sizeof (tree));
1532 mi->dimension_size_orig =
1533 (tree *) xcalloc (mi->min_indirect_level_escape,
1534 sizeof (tree));
1536 mi->dimension_size[level] = mcd.size_var;
1537 mi->dimension_size_orig[level] = mcd.size_var;
1538 continue;
1540 /* ??? Here we should also add the way to calculate the size
1541 expression not only know that it is anticipated. */
1542 sbitmap_zero (visited);
1543 size = can_calculate_expr_before_stmt (mcd.size_var, visited);
1544 if (size == NULL_TREE)
1546 mark_min_matrix_escape_level (mi, level, call_stmt);
1547 if (dump_file)
1548 fprintf (dump_file,
1549 "Matrix %s: Cannot calculate the size of allocation, escaping at level %d\n",
1550 get_name (mi->decl), level);
1551 break;
1553 if (!mi->dimension_size)
1555 mi->dimension_size =
1556 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1557 mi->dimension_size_orig =
1558 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1560 mi->dimension_size[level] = size;
1561 mi->dimension_size_orig[level] = size;
1564 /* We don't need those anymore. */
1565 for (level = mi->min_indirect_level_escape;
1566 level < mi->max_malloced_level; level++)
1567 mi->malloc_for_level[level] = NULL;
1568 return 1;
1571 /* Track all access and allocation sites. */
1572 static void
1573 find_sites_in_func (bool record)
1575 sbitmap visited_stmts_1;
1577 block_stmt_iterator bsi;
1578 tree stmt;
1579 basic_block bb;
1580 struct matrix_info tmpmi, *mi;
1582 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1584 FOR_EACH_BB (bb)
1586 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1588 stmt = bsi_stmt (bsi);
1589 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
1590 && TREE_CODE (TREE_OPERAND (stmt, 0)) == VAR_DECL)
1592 tmpmi.decl = TREE_OPERAND (stmt, 0);
1593 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1594 &tmpmi)))
1596 sbitmap_zero (visited_stmts_1);
1597 analyze_matrix_allocation_site (mi, stmt, 0, visited_stmts_1);
1600 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
1601 && TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME
1602 && TREE_CODE (TREE_OPERAND (stmt, 1)) == VAR_DECL)
1604 tmpmi.decl = TREE_OPERAND (stmt, 1);
1605 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1606 &tmpmi)))
1608 sbitmap_zero (visited_stmts_1);
1609 analyze_matrix_accesses (mi,
1610 TREE_OPERAND (stmt, 0), 0,
1611 false, visited_stmts_1, record);
1616 sbitmap_free (visited_stmts_1);
1619 /* Traverse the use-def chains to see if there are matrices that
1620 are passed through pointers and we cannot know how they are accessed.
1621 For each SSA-name defined by a global variable of our interest,
1622 we traverse the use-def chains of the SSA and follow the indirections,
1623 and record in what level of indirection the use of the variable
1624 escapes. A use of a pointer escapes when it is passed to a function,
1625 stored into memory or assigned (except in malloc and free calls). */
1627 static void
1628 record_all_accesses_in_func (void)
1630 unsigned i;
1631 sbitmap visited_stmts_1;
1633 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1635 for (i = 0; i < num_ssa_names; i++)
1637 struct matrix_info tmpmi, *mi;
1638 tree ssa_var = ssa_name (i);
1639 tree rhs, lhs;
1641 if (!ssa_var
1642 || TREE_CODE (SSA_NAME_DEF_STMT (ssa_var)) != GIMPLE_MODIFY_STMT)
1643 continue;
1644 rhs = TREE_OPERAND (SSA_NAME_DEF_STMT (ssa_var), 1);
1645 lhs = TREE_OPERAND (SSA_NAME_DEF_STMT (ssa_var), 0);
1646 if (TREE_CODE (rhs) != VAR_DECL && TREE_CODE (lhs) != VAR_DECL)
1647 continue;
1649 /* If the RHS is a matrix that we want to analyze, follow the def-use
1650 chain for this SSA_VAR and check for escapes or apply the
1651 flattening. */
1652 tmpmi.decl = rhs;
1653 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi)))
1655 /* This variable will track the visited PHI nodes, so we can limit
1656 its size to the maximum number of SSA names. */
1657 sbitmap_zero (visited_stmts_1);
1658 analyze_matrix_accesses (mi, ssa_var,
1659 0, false, visited_stmts_1, true);
1663 sbitmap_free (visited_stmts_1);
1666 /* Used when we want to convert the expression: RESULT = something * ORIG to RESULT = something * NEW. If ORIG and NEW are power of 2, shift operations can be done, else division and multiplication. */
1667 static tree
1668 compute_offset (HOST_WIDE_INT orig, HOST_WIDE_INT new, tree result)
1671 int x, y;
1672 tree result1, ratio, log, orig_tree, new_tree;
1674 x = exact_log2 (orig);
1675 y = exact_log2 (new);
1677 if (x != -1 && y != -1)
1679 if (x == y)
1680 return result;
1681 else if (x > y)
1683 log = build_int_cst (TREE_TYPE (result), x - y);
1684 result1 =
1685 fold_build2 (LSHIFT_EXPR, TREE_TYPE (result), result, log);
1686 return result1;
1688 log = build_int_cst (TREE_TYPE (result), y - x);
1689 result1 = fold_build2 (RSHIFT_EXPR, TREE_TYPE (result), result, log);
1691 return result1;
1693 orig_tree = build_int_cst (TREE_TYPE (result), orig);
1694 new_tree = build_int_cst (TREE_TYPE (result), new);
1695 ratio = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (result), result, orig_tree);
1696 result1 = fold_build2 (MULT_EXPR, TREE_TYPE (result), ratio, new_tree);
1698 return result1;
1702 /* We know that we are allowed to perform matrix flattening (according to the
1703 escape analysis), so we traverse the use-def chains of the SSA vars
1704 defined by the global variables pointing to the matrices of our interest.
1705 in each use of the SSA we calculate the offset from the base address
1706 according to the following equation:
1708 a[I1][I2]...[Ik] , where D1..Dk is the length of each dimension and the
1709 escaping level is m <= k, and a' is the new allocated matrix,
1710 will be translated to :
1712 b[I(m+1)]...[Ik]
1714 where
1715 b = a' + I1*D2...*Dm + I2*D3...Dm + ... + Im
1718 static int
1719 transform_access_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1721 block_stmt_iterator bsi;
1722 struct matrix_info *mi = (struct matrix_info *) *slot;
1723 int min_escape_l = mi->min_indirect_level_escape;
1724 struct access_site_info *acc_info;
1725 int i;
1727 if (min_escape_l < 2 || !mi->access_l)
1728 return 1;
1729 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
1730 i++)
1732 tree orig, type;
1734 /* This is possible because we collect the access sites before
1735 we determine the final minimum indirection level. */
1736 if (acc_info->level >= min_escape_l)
1738 free (acc_info);
1739 continue;
1741 if (acc_info->is_alloc)
1743 if (acc_info->level >= 0 && gimple_bb (acc_info->stmt))
1745 ssa_op_iter iter;
1746 tree def;
1747 tree stmt = acc_info->stmt;
1749 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
1750 mark_sym_for_renaming (SSA_NAME_VAR (def));
1751 bsi = bsi_for_stmt (stmt);
1752 gcc_assert (TREE_CODE (acc_info->stmt) == GIMPLE_MODIFY_STMT);
1753 if (TREE_CODE (TREE_OPERAND (acc_info->stmt, 0)) ==
1754 SSA_NAME && acc_info->level < min_escape_l - 1)
1756 imm_use_iterator imm_iter;
1757 use_operand_p use_p;
1758 tree use_stmt;
1760 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
1761 TREE_OPERAND (acc_info->stmt,
1763 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1765 tree conv, tmp, stmts;
1767 /* Emit convert statement to convert to type of use. */
1768 conv =
1769 fold_build1 (CONVERT_EXPR,
1770 TREE_TYPE (TREE_OPERAND
1771 (acc_info->stmt, 0)),
1772 TREE_OPERAND (TREE_OPERAND
1773 (acc_info->stmt, 1), 0));
1774 tmp =
1775 create_tmp_var (TREE_TYPE
1776 (TREE_OPERAND
1777 (acc_info->stmt, 0)), "new");
1778 add_referenced_var (tmp);
1779 stmts =
1780 fold_build2 (GIMPLE_MODIFY_STMT,
1781 TREE_TYPE (TREE_OPERAND
1782 (acc_info->stmt, 0)), tmp,
1783 conv);
1784 tmp = make_ssa_name (tmp, stmts);
1785 TREE_OPERAND (stmts, 0) = tmp;
1786 bsi = bsi_for_stmt (acc_info->stmt);
1787 bsi_insert_after (&bsi, stmts, BSI_SAME_STMT);
1788 SET_USE (use_p, tmp);
1791 if (acc_info->level < min_escape_l - 1)
1792 bsi_remove (&bsi, true);
1794 free (acc_info);
1795 continue;
1797 orig = TREE_OPERAND (acc_info->stmt, 1);
1798 type = TREE_TYPE (orig);
1799 if (TREE_CODE (orig) == INDIRECT_REF
1800 && acc_info->level < min_escape_l - 1)
1802 /* Replace the INDIRECT_REF with NOP (cast) usually we are casting
1803 from "pointer to type" to "type". */
1804 orig =
1805 build1 (NOP_EXPR, TREE_TYPE (orig),
1806 TREE_OPERAND (orig, 0));
1807 TREE_OPERAND (acc_info->stmt, 1) = orig;
1809 else if (TREE_CODE (orig) == POINTER_PLUS_EXPR
1810 && acc_info->level < (min_escape_l))
1812 imm_use_iterator imm_iter;
1813 use_operand_p use_p;
1815 tree offset;
1816 int k = acc_info->level;
1817 tree num_elements, total_elements;
1818 tree tmp1;
1819 tree d_size = mi->dimension_size[k];
1821 /* We already make sure in the analysis that the first operand
1822 is the base and the second is the offset. */
1823 offset = acc_info->offset;
1824 if (mi->dim_map[k] == min_escape_l - 1)
1826 if (!check_transpose_p || mi->is_transposed_p == false)
1827 tmp1 = offset;
1828 else
1830 tree new_offset;
1831 tree d_type_size, d_type_size_k;
1833 d_type_size = size_int (mi->dimension_type_size[min_escape_l]);
1834 d_type_size_k = size_int (mi->dimension_type_size[k + 1]);
1836 new_offset =
1837 compute_offset (mi->dimension_type_size[min_escape_l],
1838 mi->dimension_type_size[k + 1], offset);
1840 total_elements = new_offset;
1841 if (new_offset != offset)
1843 bsi = bsi_for_stmt (acc_info->stmt);
1844 tmp1 = force_gimple_operand_bsi (&bsi, total_elements,
1845 true, NULL,
1846 true, BSI_SAME_STMT);
1848 else
1849 tmp1 = offset;
1852 else
1854 d_size = mi->dimension_size[mi->dim_map[k] + 1];
1855 num_elements =
1856 fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, acc_info->index),
1857 fold_convert (sizetype, d_size));
1858 add_referenced_var (d_size);
1859 bsi = bsi_for_stmt (acc_info->stmt);
1860 tmp1 = force_gimple_operand_bsi (&bsi, num_elements, true,
1861 NULL, true, BSI_SAME_STMT);
1863 /* Replace the offset if needed. */
1864 if (tmp1 != offset)
1866 if (TREE_CODE (offset) == SSA_NAME)
1868 tree use_stmt;
1870 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, offset)
1871 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1872 if (use_stmt == acc_info->stmt)
1873 SET_USE (use_p, tmp1);
1875 else
1877 gcc_assert (TREE_CODE (offset) == INTEGER_CST);
1878 TREE_OPERAND (orig, 1) = tmp1;
1882 /* ??? meanwhile this happens because we record the same access
1883 site more than once; we should be using a hash table to
1884 avoid this and insert the STMT of the access site only
1885 once.
1886 else
1887 gcc_unreachable (); */
1888 free (acc_info);
1890 VEC_free (access_site_info_p, heap, mi->access_l);
1892 update_ssa (TODO_update_ssa);
1893 #ifdef ENABLE_CHECKING
1894 verify_ssa (true);
1895 #endif
1896 return 1;
1899 /* Sort A array of counts. Arrange DIM_MAP to reflect the new order. */
1901 static void
1902 sort_dim_hot_level (gcov_type * a, int *dim_map, int n)
1904 int i, j, tmp1;
1905 gcov_type tmp;
1907 for (i = 0; i < n - 1; i++)
1909 for (j = 0; j < n - 1 - i; j++)
1911 if (a[j + 1] < a[j])
1913 tmp = a[j]; /* swap a[j] and a[j+1] */
1914 a[j] = a[j + 1];
1915 a[j + 1] = tmp;
1916 tmp1 = dim_map[j];
1917 dim_map[j] = dim_map[j + 1];
1918 dim_map[j + 1] = tmp1;
1924 /* Replace multiple mallocs (one for each dimension) to one malloc
1925 with the size of DIM1*DIM2*...*DIMN*size_of_element
1926 Make sure that we hold the size in the malloc site inside a
1927 new global variable; this way we ensure that the size doesn't
1928 change and it is accessible from all the other functions that
1929 uses the matrix. Also, the original calls to free are deleted,
1930 and replaced by a new call to free the flattened matrix. */
1932 static int
1933 transform_allocation_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1935 int i;
1936 struct matrix_info *mi;
1937 tree type, call_stmt_0, malloc_stmt, oldfn, prev_dim_size, use_stmt;
1938 struct cgraph_node *c_node;
1939 struct cgraph_edge *e;
1940 block_stmt_iterator bsi;
1941 struct malloc_call_data mcd;
1942 HOST_WIDE_INT element_size;
1944 imm_use_iterator imm_iter;
1945 use_operand_p use_p;
1946 tree old_size_0, tmp;
1947 int min_escape_l;
1948 int id;
1950 mi = (struct matrix_info *) *slot;
1952 min_escape_l = mi->min_indirect_level_escape;
1954 if (!mi->malloc_for_level)
1955 mi->min_indirect_level_escape = 0;
1957 if (mi->min_indirect_level_escape < 2)
1958 return 1;
1960 mi->dim_map = (int *) xcalloc (mi->min_indirect_level_escape, sizeof (int));
1961 for (i = 0; i < mi->min_indirect_level_escape; i++)
1962 mi->dim_map[i] = i;
1963 if (check_transpose_p)
1965 int i;
1967 if (dump_file)
1969 fprintf (dump_file, "Matrix %s:\n", get_name (mi->decl));
1970 for (i = 0; i < min_escape_l; i++)
1972 fprintf (dump_file, "dim %d before sort ", i);
1973 if (mi->dim_hot_level)
1974 fprintf (dump_file,
1975 "count is " HOST_WIDEST_INT_PRINT_DEC " \n",
1976 mi->dim_hot_level[i]);
1979 sort_dim_hot_level (mi->dim_hot_level, mi->dim_map,
1980 mi->min_indirect_level_escape);
1981 if (dump_file)
1982 for (i = 0; i < min_escape_l; i++)
1984 fprintf (dump_file, "dim %d after sort\n", i);
1985 if (mi->dim_hot_level)
1986 fprintf (dump_file, "count is " HOST_WIDE_INT_PRINT_DEC
1987 " \n", (HOST_WIDE_INT) mi->dim_hot_level[i]);
1989 for (i = 0; i < mi->min_indirect_level_escape; i++)
1991 if (dump_file)
1992 fprintf (dump_file, "dim_map[%d] after sort %d\n", i,
1993 mi->dim_map[i]);
1994 if (mi->dim_map[i] != i)
1996 if (dump_file)
1997 fprintf (dump_file,
1998 "Transposed dimensions: dim %d is now dim %d\n",
1999 mi->dim_map[i], i);
2000 mi->is_transposed_p = true;
2004 else
2006 for (i = 0; i < mi->min_indirect_level_escape; i++)
2007 mi->dim_map[i] = i;
2009 /* Call statement of allocation site of level 0. */
2010 call_stmt_0 = mi->malloc_for_level[0];
2012 /* Finds the correct malloc information. */
2013 collect_data_for_malloc_call (call_stmt_0, &mcd);
2015 mi->dimension_size[0] = mcd.size_var;
2016 mi->dimension_size_orig[0] = mcd.size_var;
2017 /* Make sure that the variables in the size expression for
2018 all the dimensions (above level 0) aren't modified in
2019 the allocation function. */
2020 for (i = 1; i < mi->min_indirect_level_escape; i++)
2022 tree t;
2024 /* mi->dimension_size must contain the expression of the size calculated
2025 in check_allocation_function. */
2026 gcc_assert (mi->dimension_size[i]);
2028 t = walk_tree_without_duplicates (&(mi->dimension_size[i]),
2029 check_var_notmodified_p,
2030 mi->allocation_function_decl);
2031 if (t != NULL_TREE)
2033 mark_min_matrix_escape_level (mi, i, t);
2034 break;
2038 if (mi->min_indirect_level_escape < 2)
2039 return 1;
2041 /* Since we should make sure that the size expression is available
2042 before the call to malloc of level 0. */
2043 bsi = bsi_for_stmt (call_stmt_0);
2045 /* Find out the size of each dimension by looking at the malloc
2046 sites and create a global variable to hold it.
2047 We add the assignment to the global before the malloc of level 0. */
2049 /* To be able to produce gimple temporaries. */
2050 oldfn = current_function_decl;
2051 current_function_decl = mi->allocation_function_decl;
2052 push_cfun (DECL_STRUCT_FUNCTION (mi->allocation_function_decl));
2054 /* Set the dimension sizes as follows:
2055 DIM_SIZE[i] = DIM_SIZE[n] * ... * DIM_SIZE[i]
2056 where n is the maximum non escaping level. */
2057 element_size = mi->dimension_type_size[mi->min_indirect_level_escape];
2058 prev_dim_size = NULL_TREE;
2060 for (i = mi->min_indirect_level_escape - 1; i >= 0; i--)
2062 tree dim_size, dim_var, tmp;
2063 tree d_type_size;
2065 /* Now put the size expression in a global variable and initialize it to
2066 the size expression before the malloc of level 0. */
2067 dim_var =
2068 add_new_static_var (TREE_TYPE
2069 (mi->dimension_size_orig[mi->dim_map[i]]));
2070 type = TREE_TYPE (mi->dimension_size_orig[mi->dim_map[i]]);
2072 /* DIM_SIZE = MALLOC_SIZE_PARAM / TYPE_SIZE. */
2073 /* Find which dim ID becomes dim I. */
2074 for (id = 0; id < mi->min_indirect_level_escape; id++)
2075 if (mi->dim_map[id] == i)
2076 break;
2077 d_type_size =
2078 build_int_cst (type, mi->dimension_type_size[id + 1]);
2079 if (!prev_dim_size)
2080 prev_dim_size = build_int_cst (type, element_size);
2081 if (!check_transpose_p && i == mi->min_indirect_level_escape - 1)
2083 dim_size = mi->dimension_size_orig[id];
2085 else
2087 dim_size =
2088 fold_build2 (TRUNC_DIV_EXPR, type, mi->dimension_size_orig[id],
2089 d_type_size);
2091 dim_size = fold_build2 (MULT_EXPR, type, dim_size, prev_dim_size);
2093 dim_size = force_gimple_operand_bsi (&bsi, dim_size, true, NULL,
2094 true, BSI_SAME_STMT);
2095 /* GLOBAL_HOLDING_THE_SIZE = DIM_SIZE. */
2096 tmp = fold_build2 (GIMPLE_MODIFY_STMT, type, dim_var, dim_size);
2097 TREE_OPERAND (tmp, 0) = dim_var;
2098 mark_symbols_for_renaming (tmp);
2099 bsi_insert_before (&bsi, tmp, BSI_SAME_STMT);
2101 prev_dim_size = mi->dimension_size[i] = dim_var;
2103 update_ssa (TODO_update_ssa);
2104 /* Replace the malloc size argument in the malloc of level 0 to be
2105 the size of all the dimensions. */
2106 malloc_stmt = TREE_OPERAND (call_stmt_0, 1);
2107 c_node = cgraph_node (mi->allocation_function_decl);
2108 old_size_0 = CALL_EXPR_ARG (malloc_stmt, 0);
2109 tmp = force_gimple_operand_bsi (&bsi, mi->dimension_size[0], true,
2110 NULL, true, BSI_SAME_STMT);
2111 if (TREE_CODE (old_size_0) == SSA_NAME)
2113 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, old_size_0)
2114 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2115 if (use_stmt == call_stmt_0)
2116 SET_USE (use_p, tmp);
2118 /* When deleting the calls to malloc we need also to remove the edge from
2119 the call graph to keep it consistent. Notice that cgraph_edge may
2120 create a new node in the call graph if there is no node for the given
2121 declaration; this shouldn't be the case but currently there is no way to
2122 check this outside of "cgraph.c". */
2123 for (i = 1; i < mi->min_indirect_level_escape; i++)
2125 block_stmt_iterator bsi;
2126 tree use_stmt1 = NULL;
2127 tree call;
2129 tree call_stmt = mi->malloc_for_level[i];
2130 call = TREE_OPERAND (call_stmt, 1);
2131 gcc_assert (TREE_CODE (call) == CALL_EXPR);
2132 e = cgraph_edge (c_node, call_stmt);
2133 gcc_assert (e);
2134 cgraph_remove_edge (e);
2135 bsi = bsi_for_stmt (call_stmt);
2136 /* Remove the call stmt. */
2137 bsi_remove (&bsi, true);
2138 /* remove the type cast stmt. */
2139 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2140 TREE_OPERAND (call_stmt, 0))
2142 use_stmt1 = use_stmt;
2143 bsi = bsi_for_stmt (use_stmt);
2144 bsi_remove (&bsi, true);
2146 /* Remove the assignment of the allocated area. */
2147 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2148 TREE_OPERAND (use_stmt1, 0))
2150 bsi = bsi_for_stmt (use_stmt);
2151 bsi_remove (&bsi, true);
2154 update_ssa (TODO_update_ssa);
2155 #ifdef ENABLE_CHECKING
2156 verify_ssa (true);
2157 #endif
2158 /* Delete the calls to free. */
2159 for (i = 1; i < mi->min_indirect_level_escape; i++)
2161 block_stmt_iterator bsi;
2162 tree call;
2164 /* ??? wonder why this case is possible but we failed on it once. */
2165 if (!mi->free_stmts[i].stmt)
2166 continue;
2168 call = TREE_OPERAND (mi->free_stmts[i].stmt, 1);
2169 c_node = cgraph_node (mi->free_stmts[i].func);
2171 gcc_assert (TREE_CODE (mi->free_stmts[i].stmt) == CALL_EXPR);
2172 e = cgraph_edge (c_node, mi->free_stmts[i].stmt);
2173 gcc_assert (e);
2174 cgraph_remove_edge (e);
2175 current_function_decl = mi->free_stmts[i].func;
2176 set_cfun (DECL_STRUCT_FUNCTION (mi->free_stmts[i].func));
2177 bsi = bsi_for_stmt (mi->free_stmts[i].stmt);
2178 bsi_remove (&bsi, true);
2180 /* Return to the previous situation. */
2181 current_function_decl = oldfn;
2182 pop_cfun ();
2183 return 1;
2188 /* Print out the results of the escape analysis. */
2189 static int
2190 dump_matrix_reorg_analysis (void **slot, void *data ATTRIBUTE_UNUSED)
2192 struct matrix_info *mi = (struct matrix_info *) *slot;
2194 if (!dump_file)
2195 return 1;
2196 fprintf (dump_file, "Matrix \"%s\"; Escaping Level: %d, Num Dims: %d,",
2197 get_name (mi->decl), mi->min_indirect_level_escape, mi->num_dims);
2198 fprintf (dump_file, " Malloc Dims: %d, ", mi->max_malloced_level);
2199 fprintf (dump_file, "\n");
2200 if (mi->min_indirect_level_escape >= 2)
2201 fprintf (dump_file, "Flattened %d dimensions \n",
2202 mi->min_indirect_level_escape);
2203 return 1;
2206 #endif
2207 /* Perform matrix flattening. */
2209 static unsigned int
2210 matrix_reorg (void)
2212 #if 0 /* FIXME tuples */
2213 struct cgraph_node *node;
2215 if (profile_info)
2216 check_transpose_p = true;
2217 else
2218 check_transpose_p = false;
2219 /* If there are hand written vectors, we skip this optimization. */
2220 for (node = cgraph_nodes; node; node = node->next)
2221 if (!may_flatten_matrices (node))
2222 return 0;
2223 matrices_to_reorg = htab_create (37, mtt_info_hash, mtt_info_eq, mat_free);
2224 /* Find and record all potential matrices in the program. */
2225 find_matrices_decl ();
2226 /* Analyze the accesses of the matrices (escaping analysis). */
2227 for (node = cgraph_nodes; node; node = node->next)
2228 if (node->analyzed)
2230 tree temp_fn;
2232 temp_fn = current_function_decl;
2233 current_function_decl = node->decl;
2234 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2235 bitmap_obstack_initialize (NULL);
2236 gimple_register_cfg_hooks ();
2238 if (!gimple_in_ssa_p (cfun))
2240 free_dominance_info (CDI_DOMINATORS);
2241 free_dominance_info (CDI_POST_DOMINATORS);
2242 pop_cfun ();
2243 current_function_decl = temp_fn;
2244 bitmap_obstack_release (NULL);
2246 return 0;
2249 #ifdef ENABLE_CHECKING
2250 verify_flow_info ();
2251 #endif
2253 if (!matrices_to_reorg)
2255 free_dominance_info (CDI_DOMINATORS);
2256 free_dominance_info (CDI_POST_DOMINATORS);
2257 pop_cfun ();
2258 current_function_decl = temp_fn;
2259 bitmap_obstack_release (NULL);
2261 return 0;
2264 /* Create htap for phi nodes. */
2265 htab_mat_acc_phi_nodes = htab_create (37, mat_acc_phi_hash,
2266 mat_acc_phi_eq, free);
2267 if (!check_transpose_p)
2268 find_sites_in_func (false);
2269 else
2271 find_sites_in_func (true);
2272 loop_optimizer_init (LOOPS_NORMAL);
2273 if (current_loops)
2274 scev_initialize ();
2275 htab_traverse (matrices_to_reorg, analyze_transpose, NULL);
2276 if (current_loops)
2278 scev_finalize ();
2279 loop_optimizer_finalize ();
2280 current_loops = NULL;
2283 /* If the current function is the allocation function for any of
2284 the matrices we check its allocation and the escaping level. */
2285 htab_traverse (matrices_to_reorg, check_allocation_function, NULL);
2286 free_dominance_info (CDI_DOMINATORS);
2287 free_dominance_info (CDI_POST_DOMINATORS);
2288 pop_cfun ();
2289 current_function_decl = temp_fn;
2290 bitmap_obstack_release (NULL);
2292 htab_traverse (matrices_to_reorg, transform_allocation_sites, NULL);
2293 /* Now transform the accesses. */
2294 for (node = cgraph_nodes; node; node = node->next)
2295 if (node->analyzed)
2297 /* Remember that allocation sites have been handled. */
2298 tree temp_fn;
2300 temp_fn = current_function_decl;
2301 current_function_decl = node->decl;
2302 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2303 bitmap_obstack_initialize (NULL);
2304 gimple_register_cfg_hooks ();
2305 record_all_accesses_in_func ();
2306 htab_traverse (matrices_to_reorg, transform_access_sites, NULL);
2307 free_dominance_info (CDI_DOMINATORS);
2308 free_dominance_info (CDI_POST_DOMINATORS);
2309 pop_cfun ();
2310 current_function_decl = temp_fn;
2311 bitmap_obstack_release (NULL);
2313 htab_traverse (matrices_to_reorg, dump_matrix_reorg_analysis, NULL);
2315 current_function_decl = NULL;
2316 set_cfun (NULL);
2317 matrices_to_reorg = NULL;
2318 return 0;
2319 #else
2320 gcc_unreachable ();
2321 #endif
2325 /* The condition for matrix flattening to be performed. */
2326 static bool
2327 gate_matrix_reorg (void)
2329 /* FIXME tuples */
2330 #if 0
2331 return flag_ipa_matrix_reorg && flag_whole_program;
2332 #else
2333 return false;
2334 #endif
2337 struct simple_ipa_opt_pass pass_ipa_matrix_reorg =
2340 SIMPLE_IPA_PASS,
2341 "matrix-reorg", /* name */
2342 gate_matrix_reorg, /* gate */
2343 matrix_reorg, /* execute */
2344 NULL, /* sub */
2345 NULL, /* next */
2346 0, /* static_pass_number */
2347 0, /* tv_id */
2348 0, /* properties_required */
2349 PROP_trees, /* properties_provided */
2350 0, /* properties_destroyed */
2351 0, /* todo_flags_start */
2352 TODO_dump_cgraph | TODO_dump_func /* todo_flags_finish */