matrix-reorg.c: Re-enable all code.
[official-gcc.git] / gcc / matrix-reorg.c
blob846a813898f9a13d0b87a2b173c62be07ae0da21
1 /* Matrix layout transformations.
2 Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
3 Contributed by Razya Ladelsky <razya@il.ibm.com>
4 Originally written by Revital Eres and Mustafa Hagog.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
23 Matrix flattening optimization tries to replace a N-dimensional
24 matrix with its equivalent M-dimensional matrix, where M < N.
25 This first implementation focuses on global matrices defined dynamically.
27 When N==1, we actually flatten the whole matrix.
28 For instance consider a two-dimensional array a [dim1] [dim2].
29 The code for allocating space for it usually looks like:
31 a = (int **) malloc(dim1 * sizeof(int *));
32 for (i=0; i<dim1; i++)
33 a[i] = (int *) malloc (dim2 * sizeof(int));
35 If the array "a" is found suitable for this optimization,
36 its allocation is replaced by:
38 a = (int *) malloc (dim1 * dim2 *sizeof(int));
40 and all the references to a[i][j] are replaced by a[i * dim2 + j].
42 The two main phases of the optimization are the analysis
43 and transformation.
44 The driver of the optimization is matrix_reorg ().
48 Analysis phase:
49 ===============
51 We'll number the dimensions outside-in, meaning the most external
52 is 0, then 1, and so on.
53 The analysis part of the optimization determines K, the escape
54 level of a N-dimensional matrix (K <= N), that allows flattening of
55 the external dimensions 0,1,..., K-1. Escape level 0 means that the
56 whole matrix escapes and no flattening is possible.
58 The analysis part is implemented in analyze_matrix_allocation_site()
59 and analyze_matrix_accesses().
61 Transformation phase:
62 =====================
63 In this phase we define the new flattened matrices that replace the
64 original matrices in the code.
65 Implemented in transform_allocation_sites(),
66 transform_access_sites().
68 Matrix Transposing
69 ==================
70 The idea of Matrix Transposing is organizing the matrix in a different
71 layout such that the dimensions are reordered.
72 This could produce better cache behavior in some cases.
74 For example, lets look at the matrix accesses in the following loop:
76 for (i=0; i<N; i++)
77 for (j=0; j<M; j++)
78 access to a[i][j]
80 This loop can produce good cache behavior because the elements of
81 the inner dimension are accessed sequentially.
83 However, if the accesses of the matrix were of the following form:
85 for (i=0; i<N; i++)
86 for (j=0; j<M; j++)
87 access to a[j][i]
89 In this loop we iterate the columns and not the rows.
90 Therefore, replacing the rows and columns
91 would have had an organization with better (cache) locality.
92 Replacing the dimensions of the matrix is called matrix transposing.
94 This example, of course, could be enhanced to multiple dimensions matrices
95 as well.
97 Since a program could include all kind of accesses, there is a decision
98 mechanism, implemented in analyze_transpose(), which implements a
99 heuristic that tries to determine whether to transpose the matrix or not,
100 according to the form of the more dominant accesses.
101 This decision is transferred to the flattening mechanism, and whether
102 the matrix was transposed or not, the matrix is flattened (if possible).
104 This decision making is based on profiling information and loop information.
105 If profiling information is available, decision making mechanism will be
106 operated, otherwise the matrix will only be flattened (if possible).
108 Both optimizations are described in the paper "Matrix flattening and
109 transposing in GCC" which was presented in GCC summit 2006.
110 http://www.gccsummit.org/2006/2006-GCC-Summit-Proceedings.pdf. */
112 #include "config.h"
113 #include "system.h"
114 #include "coretypes.h"
115 #include "tm.h"
116 #include "tree.h"
117 #include "rtl.h"
118 #include "c-tree.h"
119 #include "tree-inline.h"
120 #include "tree-flow.h"
121 #include "tree-flow-inline.h"
122 #include "langhooks.h"
123 #include "hashtab.h"
124 #include "toplev.h"
125 #include "flags.h"
126 #include "ggc.h"
127 #include "debug.h"
128 #include "target.h"
129 #include "cgraph.h"
130 #include "diagnostic.h"
131 #include "timevar.h"
132 #include "params.h"
133 #include "fibheap.h"
134 #include "c-common.h"
135 #include "intl.h"
136 #include "function.h"
137 #include "basic-block.h"
138 #include "cfgloop.h"
139 #include "tree-iterator.h"
140 #include "tree-pass.h"
141 #include "opts.h"
142 #include "tree-data-ref.h"
143 #include "tree-chrec.h"
144 #include "tree-scalar-evolution.h"
146 /* We need to collect a lot of data from the original malloc,
147 particularly as the gimplifier has converted:
149 orig_var = (struct_type *) malloc (x * sizeof (struct_type *));
151 into
153 T3 = <constant> ; ** <constant> is amount to malloc; precomputed **
154 T4 = malloc (T3);
155 T5 = (struct_type *) T4;
156 orig_var = T5;
158 The following struct fields allow us to collect all the necessary data from
159 the gimplified program. The comments in the struct below are all based
160 on the gimple example above. */
162 struct malloc_call_data
164 gimple call_stmt; /* Tree for "T4 = malloc (T3);" */
165 tree size_var; /* Var decl for T3. */
166 tree malloc_size; /* Tree for "<constant>", the rhs assigned to T3. */
169 static tree can_calculate_expr_before_stmt (tree, sbitmap);
170 static tree can_calculate_stmt_before_stmt (gimple, sbitmap);
172 /* The front end of the compiler, when parsing statements of the form:
174 var = (type_cast) malloc (sizeof (type));
176 always converts this single statement into the following statements
177 (GIMPLE form):
179 T.1 = sizeof (type);
180 T.2 = malloc (T.1);
181 T.3 = (type_cast) T.2;
182 var = T.3;
184 Since we need to create new malloc statements and modify the original
185 statements somewhat, we need to find all four of the above statements.
186 Currently record_call_1 (called for building cgraph edges) finds and
187 records the statements containing the actual call to malloc, but we
188 need to find the rest of the variables/statements on our own. That
189 is what the following function does. */
190 static void
191 collect_data_for_malloc_call (gimple stmt, struct malloc_call_data *m_data)
193 tree size_var = NULL;
194 tree malloc_fn_decl;
195 tree arg1;
197 gcc_assert (is_gimple_call (stmt));
199 malloc_fn_decl = gimple_call_fndecl (stmt);
200 if (malloc_fn_decl == NULL
201 || DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
202 return;
204 arg1 = gimple_call_arg (stmt, 0);
205 size_var = arg1;
207 m_data->call_stmt = stmt;
208 m_data->size_var = size_var;
209 if (TREE_CODE (size_var) != VAR_DECL)
210 m_data->malloc_size = size_var;
211 else
212 m_data->malloc_size = NULL_TREE;
215 /* Information about matrix access site.
216 For example: if an access site of matrix arr is arr[i][j]
217 the ACCESS_SITE_INFO structure will have the address
218 of arr as its stmt. The INDEX_INFO will hold information about the
219 initial address and index of each dimension. */
220 struct access_site_info
222 /* The statement (INDIRECT_REF or POINTER_PLUS_EXPR). */
223 gimple stmt;
225 /* In case of POINTER_PLUS_EXPR, what is the offset. */
226 tree offset;
228 /* The index which created the offset. */
229 tree index;
231 /* The indirection level of this statement. */
232 int level;
234 /* TRUE for allocation site FALSE for access site. */
235 bool is_alloc;
237 /* The function containing the access site. */
238 tree function_decl;
240 /* This access is iterated in the inner most loop */
241 bool iterated_by_inner_most_loop_p;
244 typedef struct access_site_info *access_site_info_p;
245 DEF_VEC_P (access_site_info_p);
246 DEF_VEC_ALLOC_P (access_site_info_p, heap);
248 /* Information about matrix to flatten. */
249 struct matrix_info
251 /* Decl tree of this matrix. */
252 tree decl;
253 /* Number of dimensions; number
254 of "*" in the type declaration. */
255 int num_dims;
257 /* Minimum indirection level that escapes, 0 means that
258 the whole matrix escapes, k means that dimensions
259 0 to ACTUAL_DIM - k escapes. */
260 int min_indirect_level_escape;
262 gimple min_indirect_level_escape_stmt;
264 /* Is the matrix transposed. */
265 bool is_transposed_p;
267 /* Hold the allocation site for each level (dimension).
268 We can use NUM_DIMS as the upper bound and allocate the array
269 once with this number of elements and no need to use realloc and
270 MAX_MALLOCED_LEVEL. */
271 gimple *malloc_for_level;
273 int max_malloced_level;
275 /* The location of the allocation sites (they must be in one
276 function). */
277 tree allocation_function_decl;
279 /* The calls to free for each level of indirection. */
280 struct free_info
282 gimple stmt;
283 tree func;
284 } *free_stmts;
286 /* An array which holds for each dimension its size. where
287 dimension 0 is the outer most (one that contains all the others).
289 tree *dimension_size;
291 /* An array which holds for each dimension it's original size
292 (before transposing and flattening take place). */
293 tree *dimension_size_orig;
295 /* An array which holds for each dimension the size of the type of
296 of elements accessed in that level (in bytes). */
297 HOST_WIDE_INT *dimension_type_size;
299 int dimension_type_size_len;
301 /* An array collecting the count of accesses for each dimension. */
302 gcov_type *dim_hot_level;
304 /* An array of the accesses to be flattened.
305 elements are of type "struct access_site_info *". */
306 VEC (access_site_info_p, heap) * access_l;
308 /* A map of how the dimensions will be organized at the end of
309 the analyses. */
310 int *dim_map;
313 /* In each phi node we want to record the indirection level we have when we
314 get to the phi node. Usually we will have phi nodes with more than two
315 arguments, then we must assure that all of them get to the phi node with
316 the same indirection level, otherwise it's not safe to do the flattening.
317 So we record the information regarding the indirection level each time we
318 get to the phi node in this hash table. */
320 struct matrix_access_phi_node
322 gimple phi;
323 int indirection_level;
326 /* We use this structure to find if the SSA variable is accessed inside the
327 tree and record the tree containing it. */
329 struct ssa_acc_in_tree
331 /* The variable whose accesses in the tree we are looking for. */
332 tree ssa_var;
333 /* The tree and code inside it the ssa_var is accessed, currently
334 it could be an INDIRECT_REF or CALL_EXPR. */
335 enum tree_code t_code;
336 tree t_tree;
337 /* The place in the containing tree. */
338 tree *tp;
339 tree second_op;
340 bool var_found;
343 static void analyze_matrix_accesses (struct matrix_info *, tree, int, bool,
344 sbitmap, bool);
345 static int transform_allocation_sites (void **, void *);
346 static int transform_access_sites (void **, void *);
347 static int analyze_transpose (void **, void *);
348 static int dump_matrix_reorg_analysis (void **, void *);
350 static bool check_transpose_p;
352 /* Hash function used for the phi nodes. */
354 static hashval_t
355 mat_acc_phi_hash (const void *p)
357 const struct matrix_access_phi_node *const ma_phi =
358 (const struct matrix_access_phi_node *) p;
360 return htab_hash_pointer (ma_phi->phi);
363 /* Equality means phi node pointers are the same. */
365 static int
366 mat_acc_phi_eq (const void *p1, const void *p2)
368 const struct matrix_access_phi_node *const phi1 =
369 (const struct matrix_access_phi_node *) p1;
370 const struct matrix_access_phi_node *const phi2 =
371 (const struct matrix_access_phi_node *) p2;
373 if (phi1->phi == phi2->phi)
374 return 1;
376 return 0;
379 /* Hold the PHI nodes we visit during the traversal for escaping
380 analysis. */
381 static htab_t htab_mat_acc_phi_nodes = NULL;
383 /* This hash-table holds the information about the matrices we are
384 going to handle. */
385 static htab_t matrices_to_reorg = NULL;
387 /* Return a hash for MTT, which is really a "matrix_info *". */
388 static hashval_t
389 mtt_info_hash (const void *mtt)
391 return htab_hash_pointer (((const struct matrix_info *) mtt)->decl);
394 /* Return true if MTT1 and MTT2 (which are really both of type
395 "matrix_info *") refer to the same decl. */
396 static int
397 mtt_info_eq (const void *mtt1, const void *mtt2)
399 const struct matrix_info *const i1 = (const struct matrix_info *) mtt1;
400 const struct matrix_info *const i2 = (const struct matrix_info *) mtt2;
402 if (i1->decl == i2->decl)
403 return true;
405 return false;
408 /* Return false if STMT may contain a vector expression.
409 In this situation, all matrices should not be flattened. */
410 static bool
411 may_flatten_matrices_1 (gimple stmt)
413 tree t;
415 switch (gimple_code (stmt))
417 case GIMPLE_ASSIGN:
418 if (!gimple_assign_cast_p (stmt))
419 return true;
421 t = gimple_assign_rhs1 (stmt);
422 while (CONVERT_EXPR_P (t))
424 if (TREE_TYPE (t) && POINTER_TYPE_P (TREE_TYPE (t)))
426 tree pointee;
428 pointee = TREE_TYPE (t);
429 while (POINTER_TYPE_P (pointee))
430 pointee = TREE_TYPE (pointee);
431 if (TREE_CODE (pointee) == VECTOR_TYPE)
433 if (dump_file)
434 fprintf (dump_file,
435 "Found vector type, don't flatten matrix\n");
436 return false;
439 t = TREE_OPERAND (t, 0);
441 break;
442 case GIMPLE_ASM:
443 /* Asm code could contain vector operations. */
444 return false;
445 break;
446 default:
447 break;
449 return true;
452 /* Return false if there are hand-written vectors in the program.
453 We disable the flattening in such a case. */
454 static bool
455 may_flatten_matrices (struct cgraph_node *node)
457 tree decl;
458 struct function *func;
459 basic_block bb;
460 gimple_stmt_iterator gsi;
462 decl = node->decl;
463 if (node->analyzed)
465 func = DECL_STRUCT_FUNCTION (decl);
466 FOR_EACH_BB_FN (bb, func)
467 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
468 if (!may_flatten_matrices_1 (gsi_stmt (gsi)))
469 return false;
471 return true;
474 /* Given a VAR_DECL, check its type to determine whether it is
475 a definition of a dynamic allocated matrix and therefore is
476 a suitable candidate for the matrix flattening optimization.
477 Return NULL if VAR_DECL is not such decl. Otherwise, allocate
478 a MATRIX_INFO structure, fill it with the relevant information
479 and return a pointer to it.
480 TODO: handle also statically defined arrays. */
481 static struct matrix_info *
482 analyze_matrix_decl (tree var_decl)
484 struct matrix_info *m_node, tmpmi, *mi;
485 tree var_type;
486 int dim_num = 0;
488 gcc_assert (matrices_to_reorg);
490 if (TREE_CODE (var_decl) == PARM_DECL)
491 var_type = DECL_ARG_TYPE (var_decl);
492 else if (TREE_CODE (var_decl) == VAR_DECL)
493 var_type = TREE_TYPE (var_decl);
494 else
495 return NULL;
497 if (!POINTER_TYPE_P (var_type))
498 return NULL;
500 while (POINTER_TYPE_P (var_type))
502 var_type = TREE_TYPE (var_type);
503 dim_num++;
506 if (dim_num <= 1)
507 return NULL;
509 if (!COMPLETE_TYPE_P (var_type)
510 || TREE_CODE (TYPE_SIZE_UNIT (var_type)) != INTEGER_CST)
511 return NULL;
513 /* Check to see if this pointer is already in there. */
514 tmpmi.decl = var_decl;
515 mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi);
517 if (mi)
518 return NULL;
520 /* Record the matrix. */
522 m_node = (struct matrix_info *) xcalloc (1, sizeof (struct matrix_info));
523 m_node->decl = var_decl;
524 m_node->num_dims = dim_num;
525 m_node->free_stmts
526 = (struct free_info *) xcalloc (dim_num, sizeof (struct free_info));
528 /* Init min_indirect_level_escape to -1 to indicate that no escape
529 analysis has been done yet. */
530 m_node->min_indirect_level_escape = -1;
531 m_node->is_transposed_p = false;
533 return m_node;
536 /* Free matrix E. */
537 static void
538 mat_free (void *e)
540 struct matrix_info *mat = (struct matrix_info *) e;
542 if (!mat)
543 return;
545 if (mat->free_stmts)
546 free (mat->free_stmts);
547 if (mat->dim_hot_level)
548 free (mat->dim_hot_level);
549 if (mat->malloc_for_level)
550 free (mat->malloc_for_level);
553 /* Find all potential matrices.
554 TODO: currently we handle only multidimensional
555 dynamically allocated arrays. */
556 static void
557 find_matrices_decl (void)
559 struct matrix_info *tmp;
560 PTR *slot;
561 struct varpool_node *vnode;
563 gcc_assert (matrices_to_reorg);
565 /* For every global variable in the program:
566 Check to see if it's of a candidate type and record it. */
567 for (vnode = varpool_nodes_queue; vnode; vnode = vnode->next_needed)
569 tree var_decl = vnode->decl;
571 if (!var_decl || TREE_CODE (var_decl) != VAR_DECL)
572 continue;
574 if (matrices_to_reorg)
575 if ((tmp = analyze_matrix_decl (var_decl)))
577 if (!TREE_ADDRESSABLE (var_decl))
579 slot = htab_find_slot (matrices_to_reorg, tmp, INSERT);
580 *slot = tmp;
584 return;
587 /* Mark that the matrix MI escapes at level L. */
588 static void
589 mark_min_matrix_escape_level (struct matrix_info *mi, int l, gimple s)
591 if (mi->min_indirect_level_escape == -1
592 || (mi->min_indirect_level_escape > l))
594 mi->min_indirect_level_escape = l;
595 mi->min_indirect_level_escape_stmt = s;
599 /* Find if the SSA variable is accessed inside the
600 tree and record the tree containing it.
601 The only relevant uses are the case of SSA_NAME, or SSA inside
602 INDIRECT_REF, PLUS_EXPR, POINTER_PLUS_EXPR, MULT_EXPR. */
603 static void
604 ssa_accessed_in_tree (tree t, struct ssa_acc_in_tree *a)
606 a->t_code = TREE_CODE (t);
607 switch (a->t_code)
609 case SSA_NAME:
610 if (t == a->ssa_var)
611 a->var_found = true;
612 break;
613 case INDIRECT_REF:
614 if (SSA_VAR_P (TREE_OPERAND (t, 0))
615 && TREE_OPERAND (t, 0) == a->ssa_var)
616 a->var_found = true;
617 break;
618 default:
619 break;
623 /* Find if the SSA variable is accessed on the right hand side of
624 gimple call STMT. */
626 static void
627 ssa_accessed_in_call_rhs (gimple stmt, struct ssa_acc_in_tree *a)
629 tree decl;
630 tree arg;
631 size_t i;
633 a->t_code = CALL_EXPR;
634 for (i = 0; i < gimple_call_num_args (stmt); i++)
636 arg = gimple_call_arg (stmt, i);
637 if (arg == a->ssa_var)
639 a->var_found = true;
640 decl = gimple_call_fndecl (stmt);
641 a->t_tree = decl;
642 break;
647 /* Find if the SSA variable is accessed on the right hand side of
648 gimple assign STMT. */
650 static void
651 ssa_accessed_in_assign_rhs (gimple stmt, struct ssa_acc_in_tree *a)
654 a->t_code = gimple_assign_rhs_code (stmt);
655 switch (a->t_code)
657 tree op1, op2;
659 case SSA_NAME:
660 case INDIRECT_REF:
661 case CONVERT_EXPR:
662 case NOP_EXPR:
663 case VIEW_CONVERT_EXPR:
664 ssa_accessed_in_tree (gimple_assign_rhs1 (stmt), a);
665 break;
666 case POINTER_PLUS_EXPR:
667 case PLUS_EXPR:
668 case MULT_EXPR:
669 op1 = gimple_assign_rhs1 (stmt);
670 op2 = gimple_assign_rhs2 (stmt);
672 if (op1 == a->ssa_var)
674 a->var_found = true;
675 a->second_op = op2;
677 else if (op2 == a->ssa_var)
679 a->var_found = true;
680 a->second_op = op1;
682 break;
683 default:
684 break;
688 /* Record the access/allocation site information for matrix MI so we can
689 handle it later in transformation. */
690 static void
691 record_access_alloc_site_info (struct matrix_info *mi, gimple stmt, tree offset,
692 tree index, int level, bool is_alloc)
694 struct access_site_info *acc_info;
696 if (!mi->access_l)
697 mi->access_l = VEC_alloc (access_site_info_p, heap, 100);
699 acc_info
700 = (struct access_site_info *)
701 xcalloc (1, sizeof (struct access_site_info));
702 acc_info->stmt = stmt;
703 acc_info->offset = offset;
704 acc_info->index = index;
705 acc_info->function_decl = current_function_decl;
706 acc_info->level = level;
707 acc_info->is_alloc = is_alloc;
709 VEC_safe_push (access_site_info_p, heap, mi->access_l, acc_info);
713 /* Record the malloc as the allocation site of the given LEVEL. But
714 first we Make sure that all the size parameters passed to malloc in
715 all the allocation sites could be pre-calculated before the call to
716 the malloc of level 0 (the main malloc call). */
717 static void
718 add_allocation_site (struct matrix_info *mi, gimple stmt, int level)
720 struct malloc_call_data mcd;
722 /* Make sure that the allocation sites are in the same function. */
723 if (!mi->allocation_function_decl)
724 mi->allocation_function_decl = current_function_decl;
725 else if (mi->allocation_function_decl != current_function_decl)
727 int min_malloc_level;
729 gcc_assert (mi->malloc_for_level);
731 /* Find the minimum malloc level that already has been seen;
732 we known its allocation function must be
733 MI->allocation_function_decl since it's different than
734 CURRENT_FUNCTION_DECL then the escaping level should be
735 MIN (LEVEL, MIN_MALLOC_LEVEL) - 1 , and the allocation function
736 must be set accordingly. */
737 for (min_malloc_level = 0;
738 min_malloc_level < mi->max_malloced_level
739 && mi->malloc_for_level[min_malloc_level]; min_malloc_level++);
740 if (level < min_malloc_level)
742 mi->allocation_function_decl = current_function_decl;
743 mark_min_matrix_escape_level (mi, min_malloc_level, stmt);
745 else
747 mark_min_matrix_escape_level (mi, level, stmt);
748 /* cannot be that (level == min_malloc_level)
749 we would have returned earlier. */
750 return;
754 /* Find the correct malloc information. */
755 collect_data_for_malloc_call (stmt, &mcd);
757 /* We accept only calls to malloc function; we do not accept
758 calls like calloc and realloc. */
759 if (!mi->malloc_for_level)
761 mi->malloc_for_level = XCNEWVEC (gimple, level + 1);
762 mi->max_malloced_level = level + 1;
764 else if (mi->max_malloced_level <= level)
766 mi->malloc_for_level
767 = XRESIZEVEC (gimple, mi->malloc_for_level, level + 1);
769 /* Zero the newly allocated items. */
770 memset (&(mi->malloc_for_level[mi->max_malloced_level + 1]),
771 0, (level - mi->max_malloced_level) * sizeof (tree));
773 mi->max_malloced_level = level + 1;
775 mi->malloc_for_level[level] = stmt;
778 /* Given an assignment statement STMT that we know that its
779 left-hand-side is the matrix MI variable, we traverse the immediate
780 uses backwards until we get to a malloc site. We make sure that
781 there is one and only one malloc site that sets this variable. When
782 we are performing the flattening we generate a new variable that
783 will hold the size for each dimension; each malloc that allocates a
784 dimension has the size parameter; we use that parameter to
785 initialize the dimension size variable so we can use it later in
786 the address calculations. LEVEL is the dimension we're inspecting.
787 Return if STMT is related to an allocation site. */
789 static void
790 analyze_matrix_allocation_site (struct matrix_info *mi, gimple stmt,
791 int level, sbitmap visited)
793 if (gimple_assign_copy_p (stmt) || gimple_assign_cast_p (stmt))
795 tree rhs = gimple_assign_rhs1 (stmt);
797 if (TREE_CODE (rhs) == SSA_NAME)
799 gimple def = SSA_NAME_DEF_STMT (rhs);
801 analyze_matrix_allocation_site (mi, def, level, visited);
802 return;
804 /* If we are back to the original matrix variable then we
805 are sure that this is analyzed as an access site. */
806 else if (rhs == mi->decl)
807 return;
809 /* A result of call to malloc. */
810 else if (is_gimple_call (stmt))
812 int call_flags = gimple_call_flags (stmt);
814 if (!(call_flags & ECF_MALLOC))
816 mark_min_matrix_escape_level (mi, level, stmt);
817 return;
819 else
821 tree malloc_fn_decl;
822 const char *malloc_fname;
824 malloc_fn_decl = gimple_call_fndecl (stmt);
825 if (malloc_fn_decl == NULL_TREE)
827 mark_min_matrix_escape_level (mi, level, stmt);
828 return;
830 malloc_fname = IDENTIFIER_POINTER (DECL_NAME (malloc_fn_decl));
831 if (DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
833 if (dump_file)
834 fprintf (dump_file,
835 "Matrix %s is an argument to function %s\n",
836 get_name (mi->decl), get_name (malloc_fn_decl));
837 mark_min_matrix_escape_level (mi, level, stmt);
838 return;
841 /* This is a call to malloc of level 'level'.
842 mi->max_malloced_level-1 == level means that we've
843 seen a malloc statement of level 'level' before.
844 If the statement is not the same one that we've
845 seen before, then there's another malloc statement
846 for the same level, which means that we need to mark
847 it escaping. */
848 if (mi->malloc_for_level
849 && mi->max_malloced_level-1 == level
850 && mi->malloc_for_level[level] != stmt)
852 mark_min_matrix_escape_level (mi, level, stmt);
853 return;
855 else
856 add_allocation_site (mi, stmt, level);
857 return;
859 /* Looks like we don't know what is happening in this
860 statement so be in the safe side and mark it as escaping. */
861 mark_min_matrix_escape_level (mi, level, stmt);
864 /* The transposing decision making.
865 In order to to calculate the profitability of transposing, we collect two
866 types of information regarding the accesses:
867 1. profiling information used to express the hotness of an access, that
868 is how often the matrix is accessed by this access site (count of the
869 access site).
870 2. which dimension in the access site is iterated by the inner
871 most loop containing this access.
873 The matrix will have a calculated value of weighted hotness for each
874 dimension.
875 Intuitively the hotness level of a dimension is a function of how
876 many times it was the most frequently accessed dimension in the
877 highly executed access sites of this matrix.
879 As computed by following equation:
880 m n
881 __ __
882 \ \ dim_hot_level[i] +=
883 /_ /_
884 j i
885 acc[j]->dim[i]->iter_by_inner_loop * count(j)
887 Where n is the number of dims and m is the number of the matrix
888 access sites. acc[j]->dim[i]->iter_by_inner_loop is 1 if acc[j]
889 iterates over dim[i] in innermost loop, and is 0 otherwise.
891 The organization of the new matrix should be according to the
892 hotness of each dimension. The hotness of the dimension implies
893 the locality of the elements.*/
894 static int
895 analyze_transpose (void **slot, void *data ATTRIBUTE_UNUSED)
897 struct matrix_info *mi = (struct matrix_info *) *slot;
898 int min_escape_l = mi->min_indirect_level_escape;
899 struct loop *loop;
900 affine_iv iv;
901 struct access_site_info *acc_info;
902 int i;
904 if (min_escape_l < 2 || !mi->access_l)
906 if (mi->access_l)
908 for (i = 0;
909 VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
910 i++)
911 free (acc_info);
912 VEC_free (access_site_info_p, heap, mi->access_l);
915 return 1;
917 if (!mi->dim_hot_level)
918 mi->dim_hot_level =
919 (gcov_type *) xcalloc (min_escape_l, sizeof (gcov_type));
922 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
923 i++)
925 if (gimple_assign_rhs_code (acc_info->stmt) == POINTER_PLUS_EXPR
926 && acc_info->level < min_escape_l)
928 loop = loop_containing_stmt (acc_info->stmt);
929 if (!loop || loop->inner)
931 free (acc_info);
932 continue;
934 if (simple_iv (loop, acc_info->stmt, acc_info->offset, &iv, true))
936 if (iv.step != NULL)
938 HOST_WIDE_INT istep;
940 istep = int_cst_value (iv.step);
941 if (istep != 0)
943 acc_info->iterated_by_inner_most_loop_p = 1;
944 mi->dim_hot_level[acc_info->level] +=
945 gimple_bb (acc_info->stmt)->count;
951 free (acc_info);
953 VEC_free (access_site_info_p, heap, mi->access_l);
955 return 1;
958 /* Find the index which defines the OFFSET from base.
959 We walk from use to def until we find how the offset was defined. */
960 static tree
961 get_index_from_offset (tree offset, gimple def_stmt)
963 tree op1, op2, index;
965 if (gimple_code (def_stmt) == GIMPLE_PHI)
966 return NULL;
967 if ((gimple_assign_copy_p (def_stmt) || gimple_assign_cast_p (def_stmt))
968 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
969 return get_index_from_offset (offset,
970 SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt)));
971 else if (is_gimple_assign (def_stmt)
972 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
974 op1 = gimple_assign_rhs1 (def_stmt);
975 op2 = gimple_assign_rhs2 (def_stmt);
976 if (TREE_CODE (op1) != INTEGER_CST && TREE_CODE (op2) != INTEGER_CST)
977 return NULL;
978 index = (TREE_CODE (op1) == INTEGER_CST) ? op2 : op1;
979 return index;
981 else
982 return NULL_TREE;
985 /* update MI->dimension_type_size[CURRENT_INDIRECT_LEVEL] with the size
986 of the type related to the SSA_VAR, or the type related to the
987 lhs of STMT, in the case that it is an INDIRECT_REF. */
988 static void
989 update_type_size (struct matrix_info *mi, gimple stmt, tree ssa_var,
990 int current_indirect_level)
992 tree lhs;
993 HOST_WIDE_INT type_size;
995 /* Update type according to the type of the INDIRECT_REF expr. */
996 if (is_gimple_assign (stmt)
997 && TREE_CODE (gimple_assign_lhs (stmt)) == INDIRECT_REF)
999 lhs = gimple_assign_lhs (stmt);
1000 gcc_assert (POINTER_TYPE_P
1001 (TREE_TYPE (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
1002 type_size =
1003 int_size_in_bytes (TREE_TYPE
1004 (TREE_TYPE
1005 (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
1007 else
1008 type_size = int_size_in_bytes (TREE_TYPE (ssa_var));
1010 /* Record the size of elements accessed (as a whole)
1011 in the current indirection level (dimension). If the size of
1012 elements is not known at compile time, mark it as escaping. */
1013 if (type_size <= 0)
1014 mark_min_matrix_escape_level (mi, current_indirect_level, stmt);
1015 else
1017 int l = current_indirect_level;
1019 if (!mi->dimension_type_size)
1021 mi->dimension_type_size
1022 = (HOST_WIDE_INT *) xcalloc (l + 1, sizeof (HOST_WIDE_INT));
1023 mi->dimension_type_size_len = l + 1;
1025 else if (mi->dimension_type_size_len < l + 1)
1027 mi->dimension_type_size
1028 = (HOST_WIDE_INT *) xrealloc (mi->dimension_type_size,
1029 (l + 1) * sizeof (HOST_WIDE_INT));
1030 memset (&mi->dimension_type_size[mi->dimension_type_size_len],
1031 0, (l + 1 - mi->dimension_type_size_len)
1032 * sizeof (HOST_WIDE_INT));
1033 mi->dimension_type_size_len = l + 1;
1035 /* Make sure all the accesses in the same level have the same size
1036 of the type. */
1037 if (!mi->dimension_type_size[l])
1038 mi->dimension_type_size[l] = type_size;
1039 else if (mi->dimension_type_size[l] != type_size)
1040 mark_min_matrix_escape_level (mi, l, stmt);
1044 /* USE_STMT represents a GIMPLE_CALL, where one of the arguments is the
1045 ssa var that we want to check because it came from some use of matrix
1046 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so
1047 far. */
1049 static int
1050 analyze_accesses_for_call_stmt (struct matrix_info *mi, tree ssa_var,
1051 gimple use_stmt, int current_indirect_level)
1053 tree fndecl = gimple_call_fndecl (use_stmt);
1055 if (gimple_call_lhs (use_stmt))
1057 tree lhs = gimple_call_lhs (use_stmt);
1058 struct ssa_acc_in_tree lhs_acc, rhs_acc;
1060 memset (&lhs_acc, 0, sizeof (lhs_acc));
1061 memset (&rhs_acc, 0, sizeof (rhs_acc));
1063 lhs_acc.ssa_var = ssa_var;
1064 lhs_acc.t_code = ERROR_MARK;
1065 ssa_accessed_in_tree (lhs, &lhs_acc);
1066 rhs_acc.ssa_var = ssa_var;
1067 rhs_acc.t_code = ERROR_MARK;
1068 ssa_accessed_in_call_rhs (use_stmt, &rhs_acc);
1070 /* The SSA must be either in the left side or in the right side,
1071 to understand what is happening.
1072 In case the SSA_NAME is found in both sides we should be escaping
1073 at this level because in this case we cannot calculate the
1074 address correctly. */
1075 if ((lhs_acc.var_found && rhs_acc.var_found
1076 && lhs_acc.t_code == INDIRECT_REF)
1077 || (!rhs_acc.var_found && !lhs_acc.var_found))
1079 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1080 return current_indirect_level;
1082 gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1084 /* If we are storing to the matrix at some level, then mark it as
1085 escaping at that level. */
1086 if (lhs_acc.var_found)
1088 int l = current_indirect_level + 1;
1090 gcc_assert (lhs_acc.t_code == INDIRECT_REF);
1091 mark_min_matrix_escape_level (mi, l, use_stmt);
1092 return current_indirect_level;
1096 if (fndecl)
1098 if (DECL_FUNCTION_CODE (fndecl) != BUILT_IN_FREE)
1100 if (dump_file)
1101 fprintf (dump_file,
1102 "Matrix %s: Function call %s, level %d escapes.\n",
1103 get_name (mi->decl), get_name (fndecl),
1104 current_indirect_level);
1105 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1107 else if (mi->free_stmts[current_indirect_level].stmt != NULL
1108 && mi->free_stmts[current_indirect_level].stmt != use_stmt)
1109 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1110 else
1112 /*Record the free statements so we can delete them
1113 later. */
1114 int l = current_indirect_level;
1116 mi->free_stmts[l].stmt = use_stmt;
1117 mi->free_stmts[l].func = current_function_decl;
1120 return current_indirect_level;
1123 /* USE_STMT represents a phi node of the ssa var that we want to
1124 check because it came from some use of matrix
1126 We check all the escaping levels that get to the PHI node
1127 and make sure they are all the same escaping;
1128 if not (which is rare) we let the escaping level be the
1129 minimum level that gets into that PHI because starting from
1130 that level we cannot expect the behavior of the indirections.
1131 CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1133 static void
1134 analyze_accesses_for_phi_node (struct matrix_info *mi, gimple use_stmt,
1135 int current_indirect_level, sbitmap visited,
1136 bool record_accesses)
1139 struct matrix_access_phi_node tmp_maphi, *maphi, **pmaphi;
1141 tmp_maphi.phi = use_stmt;
1142 if ((maphi = (struct matrix_access_phi_node *)
1143 htab_find (htab_mat_acc_phi_nodes, &tmp_maphi)))
1145 if (maphi->indirection_level == current_indirect_level)
1146 return;
1147 else
1149 int level = MIN (maphi->indirection_level,
1150 current_indirect_level);
1151 size_t j;
1152 gimple stmt = NULL;
1154 maphi->indirection_level = level;
1155 for (j = 0; j < gimple_phi_num_args (use_stmt); j++)
1157 tree def = PHI_ARG_DEF (use_stmt, j);
1159 if (gimple_code (SSA_NAME_DEF_STMT (def)) != GIMPLE_PHI)
1160 stmt = SSA_NAME_DEF_STMT (def);
1162 mark_min_matrix_escape_level (mi, level, stmt);
1164 return;
1166 maphi = (struct matrix_access_phi_node *)
1167 xcalloc (1, sizeof (struct matrix_access_phi_node));
1168 maphi->phi = use_stmt;
1169 maphi->indirection_level = current_indirect_level;
1171 /* Insert to hash table. */
1172 pmaphi = (struct matrix_access_phi_node **)
1173 htab_find_slot (htab_mat_acc_phi_nodes, maphi, INSERT);
1174 gcc_assert (pmaphi);
1175 *pmaphi = maphi;
1177 if (!TEST_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt))))
1179 SET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1180 analyze_matrix_accesses (mi, PHI_RESULT (use_stmt),
1181 current_indirect_level, false, visited,
1182 record_accesses);
1183 RESET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1187 /* USE_STMT represents an assign statement (the rhs or lhs include
1188 the ssa var that we want to check because it came from some use of matrix
1189 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1191 static int
1192 analyze_accesses_for_assign_stmt (struct matrix_info *mi, tree ssa_var,
1193 gimple use_stmt, int current_indirect_level,
1194 bool last_op, sbitmap visited,
1195 bool record_accesses)
1197 tree lhs = gimple_get_lhs (use_stmt);
1198 struct ssa_acc_in_tree lhs_acc, rhs_acc;
1200 memset (&lhs_acc, 0, sizeof (lhs_acc));
1201 memset (&rhs_acc, 0, sizeof (rhs_acc));
1203 lhs_acc.ssa_var = ssa_var;
1204 lhs_acc.t_code = ERROR_MARK;
1205 ssa_accessed_in_tree (lhs, &lhs_acc);
1206 rhs_acc.ssa_var = ssa_var;
1207 rhs_acc.t_code = ERROR_MARK;
1208 ssa_accessed_in_assign_rhs (use_stmt, &rhs_acc);
1210 /* The SSA must be either in the left side or in the right side,
1211 to understand what is happening.
1212 In case the SSA_NAME is found in both sides we should be escaping
1213 at this level because in this case we cannot calculate the
1214 address correctly. */
1215 if ((lhs_acc.var_found && rhs_acc.var_found
1216 && lhs_acc.t_code == INDIRECT_REF)
1217 || (!rhs_acc.var_found && !lhs_acc.var_found))
1219 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1220 return current_indirect_level;
1222 gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1224 /* If we are storing to the matrix at some level, then mark it as
1225 escaping at that level. */
1226 if (lhs_acc.var_found)
1228 int l = current_indirect_level + 1;
1230 gcc_assert (lhs_acc.t_code == INDIRECT_REF);
1232 if (!(gimple_assign_copy_p (use_stmt)
1233 || gimple_assign_cast_p (use_stmt))
1234 || (TREE_CODE (gimple_assign_rhs1 (use_stmt)) != SSA_NAME))
1235 mark_min_matrix_escape_level (mi, l, use_stmt);
1236 else
1238 gimple def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (use_stmt));
1239 analyze_matrix_allocation_site (mi, def_stmt, l, visited);
1240 if (record_accesses)
1241 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1242 NULL_TREE, l, true);
1243 update_type_size (mi, use_stmt, NULL, l);
1245 return current_indirect_level;
1247 /* Now, check the right-hand-side, to see how the SSA variable
1248 is used. */
1249 if (rhs_acc.var_found)
1251 if (rhs_acc.t_code != INDIRECT_REF
1252 && rhs_acc.t_code != POINTER_PLUS_EXPR && rhs_acc.t_code != SSA_NAME)
1254 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1255 return current_indirect_level;
1257 /* If the access in the RHS has an indirection increase the
1258 indirection level. */
1259 if (rhs_acc.t_code == INDIRECT_REF)
1261 if (record_accesses)
1262 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1263 NULL_TREE,
1264 current_indirect_level, true);
1265 current_indirect_level += 1;
1267 else if (rhs_acc.t_code == POINTER_PLUS_EXPR)
1269 gcc_assert (rhs_acc.second_op);
1270 if (last_op)
1271 /* Currently we support only one PLUS expression on the
1272 SSA_NAME that holds the base address of the current
1273 indirection level; to support more general case there
1274 is a need to hold a stack of expressions and regenerate
1275 the calculation later. */
1276 mark_min_matrix_escape_level (mi, current_indirect_level,
1277 use_stmt);
1278 else
1280 tree index;
1281 tree op1, op2;
1283 op1 = gimple_assign_rhs1 (use_stmt);
1284 op2 = gimple_assign_rhs2 (use_stmt);
1286 op2 = (op1 == ssa_var) ? op2 : op1;
1287 if (TREE_CODE (op2) == INTEGER_CST)
1288 index =
1289 build_int_cst (TREE_TYPE (op1),
1290 TREE_INT_CST_LOW (op2) /
1291 int_size_in_bytes (TREE_TYPE (op1)));
1292 else
1294 index =
1295 get_index_from_offset (op2, SSA_NAME_DEF_STMT (op2));
1296 if (index == NULL_TREE)
1298 mark_min_matrix_escape_level (mi,
1299 current_indirect_level,
1300 use_stmt);
1301 return current_indirect_level;
1304 if (record_accesses)
1305 record_access_alloc_site_info (mi, use_stmt, op2,
1306 index,
1307 current_indirect_level, false);
1310 /* If we are storing this level of indirection mark it as
1311 escaping. */
1312 if (lhs_acc.t_code == INDIRECT_REF || TREE_CODE (lhs) != SSA_NAME)
1314 int l = current_indirect_level;
1316 /* One exception is when we are storing to the matrix
1317 variable itself; this is the case of malloc, we must make
1318 sure that it's the one and only one call to malloc so
1319 we call analyze_matrix_allocation_site to check
1320 this out. */
1321 if (TREE_CODE (lhs) != VAR_DECL || lhs != mi->decl)
1322 mark_min_matrix_escape_level (mi, current_indirect_level,
1323 use_stmt);
1324 else
1326 /* Also update the escaping level. */
1327 analyze_matrix_allocation_site (mi, use_stmt, l, visited);
1328 if (record_accesses)
1329 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1330 NULL_TREE, l, true);
1333 else
1335 /* We are placing it in an SSA, follow that SSA. */
1336 analyze_matrix_accesses (mi, lhs,
1337 current_indirect_level,
1338 rhs_acc.t_code == POINTER_PLUS_EXPR,
1339 visited, record_accesses);
1342 return current_indirect_level;
1345 /* Given a SSA_VAR (coming from a use statement of the matrix MI),
1346 follow its uses and level of indirection and find out the minimum
1347 indirection level it escapes in (the highest dimension) and the maximum
1348 level it is accessed in (this will be the actual dimension of the
1349 matrix). The information is accumulated in MI.
1350 We look at the immediate uses, if one escapes we finish; if not,
1351 we make a recursive call for each one of the immediate uses of the
1352 resulting SSA name. */
1353 static void
1354 analyze_matrix_accesses (struct matrix_info *mi, tree ssa_var,
1355 int current_indirect_level, bool last_op,
1356 sbitmap visited, bool record_accesses)
1358 imm_use_iterator imm_iter;
1359 use_operand_p use_p;
1361 update_type_size (mi, SSA_NAME_DEF_STMT (ssa_var), ssa_var,
1362 current_indirect_level);
1364 /* We don't go beyond the escaping level when we are performing the
1365 flattening. NOTE: we keep the last indirection level that doesn't
1366 escape. */
1367 if (mi->min_indirect_level_escape > -1
1368 && mi->min_indirect_level_escape <= current_indirect_level)
1369 return;
1371 /* Now go over the uses of the SSA_NAME and check how it is used in
1372 each one of them. We are mainly looking for the pattern INDIRECT_REF,
1373 then a POINTER_PLUS_EXPR, then INDIRECT_REF etc. while in between there could
1374 be any number of copies and casts. */
1375 gcc_assert (TREE_CODE (ssa_var) == SSA_NAME);
1377 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_var)
1379 gimple use_stmt = USE_STMT (use_p);
1380 if (gimple_code (use_stmt) == GIMPLE_PHI)
1381 /* We check all the escaping levels that get to the PHI node
1382 and make sure they are all the same escaping;
1383 if not (which is rare) we let the escaping level be the
1384 minimum level that gets into that PHI because starting from
1385 that level we cannot expect the behavior of the indirections. */
1387 analyze_accesses_for_phi_node (mi, use_stmt, current_indirect_level,
1388 visited, record_accesses);
1390 else if (is_gimple_call (use_stmt))
1391 analyze_accesses_for_call_stmt (mi, ssa_var, use_stmt,
1392 current_indirect_level);
1393 else if (is_gimple_assign (use_stmt))
1394 current_indirect_level =
1395 analyze_accesses_for_assign_stmt (mi, ssa_var, use_stmt,
1396 current_indirect_level, last_op,
1397 visited, record_accesses);
1401 typedef struct
1403 tree fn;
1404 gimple stmt;
1405 } check_var_data;
1407 /* A walk_tree function to go over the VAR_DECL, PARM_DECL nodes of
1408 the malloc size expression and check that those aren't changed
1409 over the function. */
1410 static tree
1411 check_var_notmodified_p (tree * tp, int *walk_subtrees, void *data)
1413 basic_block bb;
1414 tree t = *tp;
1415 check_var_data *callback_data = (check_var_data*) data;
1416 tree fn = callback_data->fn;
1417 gimple_stmt_iterator gsi;
1418 gimple stmt;
1420 if (TREE_CODE (t) != VAR_DECL && TREE_CODE (t) != PARM_DECL)
1421 return NULL_TREE;
1423 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (fn))
1425 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1427 stmt = gsi_stmt (gsi);
1428 if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
1429 continue;
1430 if (gimple_get_lhs (stmt) == t)
1432 callback_data->stmt = stmt;
1433 return t;
1437 *walk_subtrees = 1;
1438 return NULL_TREE;
1441 /* Go backwards in the use-def chains and find out the expression
1442 represented by the possible SSA name in STMT, until it is composed
1443 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1444 we make sure that all the arguments represent the same subexpression,
1445 otherwise we fail. */
1447 static tree
1448 can_calculate_stmt_before_stmt (gimple stmt, sbitmap visited)
1450 tree op1, op2, res;
1451 enum tree_code code;
1453 switch (gimple_code (stmt))
1455 case GIMPLE_ASSIGN:
1456 code = gimple_assign_rhs_code (stmt);
1457 op1 = gimple_assign_rhs1 (stmt);
1459 switch (code)
1461 case POINTER_PLUS_EXPR:
1462 case PLUS_EXPR:
1463 case MINUS_EXPR:
1464 case MULT_EXPR:
1466 op2 = gimple_assign_rhs2 (stmt);
1467 op1 = can_calculate_expr_before_stmt (op1, visited);
1468 if (!op1)
1469 return NULL_TREE;
1470 op2 = can_calculate_expr_before_stmt (op2, visited);
1471 if (op2)
1472 return fold_build2 (code, gimple_expr_type (stmt), op1, op2);
1473 return NULL_TREE;
1475 CASE_CONVERT:
1476 res = can_calculate_expr_before_stmt (op1, visited);
1477 if (res != NULL_TREE)
1478 return build1 (code, gimple_expr_type (stmt), res);
1479 else
1480 return NULL_TREE;
1482 default:
1483 if (gimple_assign_single_p (stmt))
1484 return can_calculate_expr_before_stmt (op1, visited);
1485 else
1486 return NULL_TREE;
1489 case GIMPLE_PHI:
1491 size_t j;
1493 res = NULL_TREE;
1494 /* Make sure all the arguments represent the same value. */
1495 for (j = 0; j < gimple_phi_num_args (stmt); j++)
1497 tree new_res;
1498 tree def = PHI_ARG_DEF (stmt, j);
1500 new_res = can_calculate_expr_before_stmt (def, visited);
1501 if (res == NULL_TREE)
1502 res = new_res;
1503 else if (!new_res || !expressions_equal_p (res, new_res))
1504 return NULL_TREE;
1506 return res;
1509 default:
1510 return NULL_TREE;
1514 /* Go backwards in the use-def chains and find out the expression
1515 represented by the possible SSA name in EXPR, until it is composed
1516 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1517 we make sure that all the arguments represent the same subexpression,
1518 otherwise we fail. */
1519 static tree
1520 can_calculate_expr_before_stmt (tree expr, sbitmap visited)
1522 gimple def_stmt;
1523 tree res;
1525 switch (TREE_CODE (expr))
1527 case SSA_NAME:
1528 /* Case of loop, we don't know to represent this expression. */
1529 if (TEST_BIT (visited, SSA_NAME_VERSION (expr)))
1530 return NULL_TREE;
1532 SET_BIT (visited, SSA_NAME_VERSION (expr));
1533 def_stmt = SSA_NAME_DEF_STMT (expr);
1534 res = can_calculate_stmt_before_stmt (def_stmt, visited);
1535 RESET_BIT (visited, SSA_NAME_VERSION (expr));
1536 return res;
1537 case VAR_DECL:
1538 case PARM_DECL:
1539 case INTEGER_CST:
1540 return expr;
1542 default:
1543 return NULL_TREE;
1547 /* There should be only one allocation function for the dimensions
1548 that don't escape. Here we check the allocation sites in this
1549 function. We must make sure that all the dimensions are allocated
1550 using malloc and that the malloc size parameter expression could be
1551 pre-calculated before the call to the malloc of dimension 0.
1553 Given a candidate matrix for flattening -- MI -- check if it's
1554 appropriate for flattening -- we analyze the allocation
1555 sites that we recorded in the previous analysis. The result of the
1556 analysis is a level of indirection (matrix dimension) in which the
1557 flattening is safe. We check the following conditions:
1558 1. There is only one allocation site for each dimension.
1559 2. The allocation sites of all the dimensions are in the same
1560 function.
1561 (The above two are being taken care of during the analysis when
1562 we check the allocation site).
1563 3. All the dimensions that we flatten are allocated at once; thus
1564 the total size must be known before the allocation of the
1565 dimension 0 (top level) -- we must make sure we represent the
1566 size of the allocation as an expression of global parameters or
1567 constants and that those doesn't change over the function. */
1569 static int
1570 check_allocation_function (void **slot, void *data ATTRIBUTE_UNUSED)
1572 int level;
1573 gimple_stmt_iterator gsi;
1574 basic_block bb_level_0;
1575 struct matrix_info *mi = (struct matrix_info *) *slot;
1576 sbitmap visited;
1578 if (!mi->malloc_for_level)
1579 return 1;
1581 visited = sbitmap_alloc (num_ssa_names);
1583 /* Do nothing if the current function is not the allocation
1584 function of MI. */
1585 if (mi->allocation_function_decl != current_function_decl
1586 /* We aren't in the main allocation function yet. */
1587 || !mi->malloc_for_level[0])
1588 return 1;
1590 for (level = 1; level < mi->max_malloced_level; level++)
1591 if (!mi->malloc_for_level[level])
1592 break;
1594 mark_min_matrix_escape_level (mi, level, NULL);
1596 gsi = gsi_for_stmt (mi->malloc_for_level[0]);
1597 bb_level_0 = gsi.bb;
1599 /* Check if the expression of the size passed to malloc could be
1600 pre-calculated before the malloc of level 0. */
1601 for (level = 1; level < mi->min_indirect_level_escape; level++)
1603 gimple call_stmt;
1604 tree size;
1605 struct malloc_call_data mcd;
1607 call_stmt = mi->malloc_for_level[level];
1609 /* Find the correct malloc information. */
1610 collect_data_for_malloc_call (call_stmt, &mcd);
1612 /* No need to check anticipation for constants. */
1613 if (TREE_CODE (mcd.size_var) == INTEGER_CST)
1615 if (!mi->dimension_size)
1617 mi->dimension_size =
1618 (tree *) xcalloc (mi->min_indirect_level_escape,
1619 sizeof (tree));
1620 mi->dimension_size_orig =
1621 (tree *) xcalloc (mi->min_indirect_level_escape,
1622 sizeof (tree));
1624 mi->dimension_size[level] = mcd.size_var;
1625 mi->dimension_size_orig[level] = mcd.size_var;
1626 continue;
1628 /* ??? Here we should also add the way to calculate the size
1629 expression not only know that it is anticipated. */
1630 sbitmap_zero (visited);
1631 size = can_calculate_expr_before_stmt (mcd.size_var, visited);
1632 if (size == NULL_TREE)
1634 mark_min_matrix_escape_level (mi, level, call_stmt);
1635 if (dump_file)
1636 fprintf (dump_file,
1637 "Matrix %s: Cannot calculate the size of allocation, escaping at level %d\n",
1638 get_name (mi->decl), level);
1639 break;
1641 if (!mi->dimension_size)
1643 mi->dimension_size =
1644 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1645 mi->dimension_size_orig =
1646 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1648 mi->dimension_size[level] = size;
1649 mi->dimension_size_orig[level] = size;
1652 /* We don't need those anymore. */
1653 for (level = mi->min_indirect_level_escape;
1654 level < mi->max_malloced_level; level++)
1655 mi->malloc_for_level[level] = NULL;
1656 return 1;
1659 /* Track all access and allocation sites. */
1660 static void
1661 find_sites_in_func (bool record)
1663 sbitmap visited_stmts_1;
1665 gimple_stmt_iterator gsi;
1666 gimple stmt;
1667 basic_block bb;
1668 struct matrix_info tmpmi, *mi;
1670 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1672 FOR_EACH_BB (bb)
1674 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1676 tree lhs;
1678 stmt = gsi_stmt (gsi);
1679 lhs = gimple_get_lhs (stmt);
1680 if (lhs != NULL_TREE
1681 && TREE_CODE (lhs) == VAR_DECL)
1683 tmpmi.decl = lhs;
1684 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1685 &tmpmi)))
1687 sbitmap_zero (visited_stmts_1);
1688 analyze_matrix_allocation_site (mi, stmt, 0, visited_stmts_1);
1691 if (is_gimple_assign (stmt)
1692 && gimple_assign_single_p (stmt)
1693 && TREE_CODE (lhs) == SSA_NAME
1694 && TREE_CODE (gimple_assign_rhs1 (stmt)) == VAR_DECL)
1696 tmpmi.decl = gimple_assign_rhs1 (stmt);
1697 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1698 &tmpmi)))
1700 sbitmap_zero (visited_stmts_1);
1701 analyze_matrix_accesses (mi, lhs, 0,
1702 false, visited_stmts_1, record);
1707 sbitmap_free (visited_stmts_1);
1710 /* Traverse the use-def chains to see if there are matrices that
1711 are passed through pointers and we cannot know how they are accessed.
1712 For each SSA-name defined by a global variable of our interest,
1713 we traverse the use-def chains of the SSA and follow the indirections,
1714 and record in what level of indirection the use of the variable
1715 escapes. A use of a pointer escapes when it is passed to a function,
1716 stored into memory or assigned (except in malloc and free calls). */
1718 static void
1719 record_all_accesses_in_func (void)
1721 unsigned i;
1722 sbitmap visited_stmts_1;
1724 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1726 for (i = 0; i < num_ssa_names; i++)
1728 struct matrix_info tmpmi, *mi;
1729 tree ssa_var = ssa_name (i);
1730 tree rhs, lhs;
1732 if (!ssa_var
1733 || !is_gimple_assign (SSA_NAME_DEF_STMT (ssa_var))
1734 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (ssa_var)))
1735 continue;
1736 rhs = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (ssa_var));
1737 lhs = gimple_assign_lhs (SSA_NAME_DEF_STMT (ssa_var));
1738 if (TREE_CODE (rhs) != VAR_DECL && TREE_CODE (lhs) != VAR_DECL)
1739 continue;
1741 /* If the RHS is a matrix that we want to analyze, follow the def-use
1742 chain for this SSA_VAR and check for escapes or apply the
1743 flattening. */
1744 tmpmi.decl = rhs;
1745 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi)))
1747 /* This variable will track the visited PHI nodes, so we can limit
1748 its size to the maximum number of SSA names. */
1749 sbitmap_zero (visited_stmts_1);
1750 analyze_matrix_accesses (mi, ssa_var,
1751 0, false, visited_stmts_1, true);
1755 sbitmap_free (visited_stmts_1);
1758 /* Used when we want to convert the expression: RESULT = something * ORIG to RESULT = something * NEW. If ORIG and NEW are power of 2, shift operations can be done, else division and multiplication. */
1759 static tree
1760 compute_offset (HOST_WIDE_INT orig, HOST_WIDE_INT new, tree result)
1763 int x, y;
1764 tree result1, ratio, log, orig_tree, new_tree;
1766 x = exact_log2 (orig);
1767 y = exact_log2 (new);
1769 if (x != -1 && y != -1)
1771 if (x == y)
1772 return result;
1773 else if (x > y)
1775 log = build_int_cst (TREE_TYPE (result), x - y);
1776 result1 =
1777 fold_build2 (LSHIFT_EXPR, TREE_TYPE (result), result, log);
1778 return result1;
1780 log = build_int_cst (TREE_TYPE (result), y - x);
1781 result1 = fold_build2 (RSHIFT_EXPR, TREE_TYPE (result), result, log);
1783 return result1;
1785 orig_tree = build_int_cst (TREE_TYPE (result), orig);
1786 new_tree = build_int_cst (TREE_TYPE (result), new);
1787 ratio = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (result), result, orig_tree);
1788 result1 = fold_build2 (MULT_EXPR, TREE_TYPE (result), ratio, new_tree);
1790 return result1;
1794 /* We know that we are allowed to perform matrix flattening (according to the
1795 escape analysis), so we traverse the use-def chains of the SSA vars
1796 defined by the global variables pointing to the matrices of our interest.
1797 in each use of the SSA we calculate the offset from the base address
1798 according to the following equation:
1800 a[I1][I2]...[Ik] , where D1..Dk is the length of each dimension and the
1801 escaping level is m <= k, and a' is the new allocated matrix,
1802 will be translated to :
1804 b[I(m+1)]...[Ik]
1806 where
1807 b = a' + I1*D2...*Dm + I2*D3...Dm + ... + Im
1810 static int
1811 transform_access_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1813 gimple_stmt_iterator gsi;
1814 struct matrix_info *mi = (struct matrix_info *) *slot;
1815 int min_escape_l = mi->min_indirect_level_escape;
1816 struct access_site_info *acc_info;
1817 enum tree_code code;
1818 int i;
1820 if (min_escape_l < 2 || !mi->access_l)
1821 return 1;
1822 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
1823 i++)
1825 /* This is possible because we collect the access sites before
1826 we determine the final minimum indirection level. */
1827 if (acc_info->level >= min_escape_l)
1829 free (acc_info);
1830 continue;
1832 if (acc_info->is_alloc)
1834 if (acc_info->level >= 0 && gimple_bb (acc_info->stmt))
1836 ssa_op_iter iter;
1837 tree def;
1838 gimple stmt = acc_info->stmt;
1839 tree lhs;
1841 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
1842 mark_sym_for_renaming (SSA_NAME_VAR (def));
1843 gsi = gsi_for_stmt (stmt);
1844 gcc_assert (is_gimple_assign (acc_info->stmt));
1845 lhs = gimple_assign_lhs (acc_info->stmt);
1846 if (TREE_CODE (lhs) == SSA_NAME
1847 && acc_info->level < min_escape_l - 1)
1849 imm_use_iterator imm_iter;
1850 use_operand_p use_p;
1851 gimple use_stmt;
1853 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
1854 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1856 tree rhs, tmp;
1857 gimple new_stmt;
1859 gcc_assert (gimple_assign_rhs_code (acc_info->stmt)
1860 == INDIRECT_REF);
1861 /* Emit convert statement to convert to type of use. */
1862 tmp = create_tmp_var (TREE_TYPE (lhs), "new");
1863 add_referenced_var (tmp);
1864 rhs = gimple_assign_rhs1 (acc_info->stmt);
1865 new_stmt = gimple_build_assign (tmp,
1866 TREE_OPERAND (rhs, 0));
1867 tmp = make_ssa_name (tmp, new_stmt);
1868 gimple_assign_set_lhs (new_stmt, tmp);
1869 gsi = gsi_for_stmt (acc_info->stmt);
1870 gsi_insert_after (&gsi, new_stmt, GSI_SAME_STMT);
1871 SET_USE (use_p, tmp);
1874 if (acc_info->level < min_escape_l - 1)
1875 gsi_remove (&gsi, true);
1877 free (acc_info);
1878 continue;
1880 code = gimple_assign_rhs_code (acc_info->stmt);
1881 if (code == INDIRECT_REF
1882 && acc_info->level < min_escape_l - 1)
1884 /* Replace the INDIRECT_REF with NOP (cast) usually we are casting
1885 from "pointer to type" to "type". */
1886 tree t =
1887 build1 (NOP_EXPR, TREE_TYPE (gimple_assign_rhs1 (acc_info->stmt)),
1888 TREE_OPERAND (gimple_assign_rhs1 (acc_info->stmt), 0));
1889 gimple_assign_set_rhs_code (acc_info->stmt, NOP_EXPR);
1890 gimple_assign_set_rhs1 (acc_info->stmt, t);
1892 else if (code == POINTER_PLUS_EXPR
1893 && acc_info->level < (min_escape_l))
1895 imm_use_iterator imm_iter;
1896 use_operand_p use_p;
1898 tree offset;
1899 int k = acc_info->level;
1900 tree num_elements, total_elements;
1901 tree tmp1;
1902 tree d_size = mi->dimension_size[k];
1904 /* We already make sure in the analysis that the first operand
1905 is the base and the second is the offset. */
1906 offset = acc_info->offset;
1907 if (mi->dim_map[k] == min_escape_l - 1)
1909 if (!check_transpose_p || mi->is_transposed_p == false)
1910 tmp1 = offset;
1911 else
1913 tree new_offset;
1914 tree d_type_size, d_type_size_k;
1916 d_type_size = size_int (mi->dimension_type_size[min_escape_l]);
1917 d_type_size_k = size_int (mi->dimension_type_size[k + 1]);
1919 new_offset =
1920 compute_offset (mi->dimension_type_size[min_escape_l],
1921 mi->dimension_type_size[k + 1], offset);
1923 total_elements = new_offset;
1924 if (new_offset != offset)
1926 gsi = gsi_for_stmt (acc_info->stmt);
1927 tmp1 = force_gimple_operand_gsi (&gsi, total_elements,
1928 true, NULL,
1929 true, GSI_SAME_STMT);
1931 else
1932 tmp1 = offset;
1935 else
1937 d_size = mi->dimension_size[mi->dim_map[k] + 1];
1938 num_elements =
1939 fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, acc_info->index),
1940 fold_convert (sizetype, d_size));
1941 add_referenced_var (d_size);
1942 gsi = gsi_for_stmt (acc_info->stmt);
1943 tmp1 = force_gimple_operand_gsi (&gsi, num_elements, true,
1944 NULL, true, GSI_SAME_STMT);
1946 /* Replace the offset if needed. */
1947 if (tmp1 != offset)
1949 if (TREE_CODE (offset) == SSA_NAME)
1951 gimple use_stmt;
1953 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, offset)
1954 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1955 if (use_stmt == acc_info->stmt)
1956 SET_USE (use_p, tmp1);
1958 else
1960 gcc_assert (TREE_CODE (offset) == INTEGER_CST);
1961 gimple_assign_set_rhs2 (acc_info->stmt, tmp1);
1965 /* ??? meanwhile this happens because we record the same access
1966 site more than once; we should be using a hash table to
1967 avoid this and insert the STMT of the access site only
1968 once.
1969 else
1970 gcc_unreachable (); */
1971 free (acc_info);
1973 VEC_free (access_site_info_p, heap, mi->access_l);
1975 update_ssa (TODO_update_ssa);
1976 #ifdef ENABLE_CHECKING
1977 verify_ssa (true);
1978 #endif
1979 return 1;
1982 /* Sort A array of counts. Arrange DIM_MAP to reflect the new order. */
1984 static void
1985 sort_dim_hot_level (gcov_type * a, int *dim_map, int n)
1987 int i, j, tmp1;
1988 gcov_type tmp;
1990 for (i = 0; i < n - 1; i++)
1992 for (j = 0; j < n - 1 - i; j++)
1994 if (a[j + 1] < a[j])
1996 tmp = a[j]; /* swap a[j] and a[j+1] */
1997 a[j] = a[j + 1];
1998 a[j + 1] = tmp;
1999 tmp1 = dim_map[j];
2000 dim_map[j] = dim_map[j + 1];
2001 dim_map[j + 1] = tmp1;
2007 /* Replace multiple mallocs (one for each dimension) to one malloc
2008 with the size of DIM1*DIM2*...*DIMN*size_of_element
2009 Make sure that we hold the size in the malloc site inside a
2010 new global variable; this way we ensure that the size doesn't
2011 change and it is accessible from all the other functions that
2012 uses the matrix. Also, the original calls to free are deleted,
2013 and replaced by a new call to free the flattened matrix. */
2015 static int
2016 transform_allocation_sites (void **slot, void *data ATTRIBUTE_UNUSED)
2018 int i;
2019 struct matrix_info *mi;
2020 tree type, oldfn, prev_dim_size;
2021 gimple call_stmt_0, use_stmt;
2022 struct cgraph_node *c_node;
2023 struct cgraph_edge *e;
2024 gimple_stmt_iterator gsi;
2025 struct malloc_call_data mcd;
2026 HOST_WIDE_INT element_size;
2028 imm_use_iterator imm_iter;
2029 use_operand_p use_p;
2030 tree old_size_0, tmp;
2031 int min_escape_l;
2032 int id;
2034 mi = (struct matrix_info *) *slot;
2036 min_escape_l = mi->min_indirect_level_escape;
2038 if (!mi->malloc_for_level)
2039 mi->min_indirect_level_escape = 0;
2041 if (mi->min_indirect_level_escape < 2)
2042 return 1;
2044 mi->dim_map = (int *) xcalloc (mi->min_indirect_level_escape, sizeof (int));
2045 for (i = 0; i < mi->min_indirect_level_escape; i++)
2046 mi->dim_map[i] = i;
2047 if (check_transpose_p)
2049 int i;
2051 if (dump_file)
2053 fprintf (dump_file, "Matrix %s:\n", get_name (mi->decl));
2054 for (i = 0; i < min_escape_l; i++)
2056 fprintf (dump_file, "dim %d before sort ", i);
2057 if (mi->dim_hot_level)
2058 fprintf (dump_file,
2059 "count is " HOST_WIDEST_INT_PRINT_DEC " \n",
2060 mi->dim_hot_level[i]);
2063 sort_dim_hot_level (mi->dim_hot_level, mi->dim_map,
2064 mi->min_indirect_level_escape);
2065 if (dump_file)
2066 for (i = 0; i < min_escape_l; i++)
2068 fprintf (dump_file, "dim %d after sort\n", i);
2069 if (mi->dim_hot_level)
2070 fprintf (dump_file, "count is " HOST_WIDE_INT_PRINT_DEC
2071 " \n", (HOST_WIDE_INT) mi->dim_hot_level[i]);
2073 for (i = 0; i < mi->min_indirect_level_escape; i++)
2075 if (dump_file)
2076 fprintf (dump_file, "dim_map[%d] after sort %d\n", i,
2077 mi->dim_map[i]);
2078 if (mi->dim_map[i] != i)
2080 if (dump_file)
2081 fprintf (dump_file,
2082 "Transposed dimensions: dim %d is now dim %d\n",
2083 mi->dim_map[i], i);
2084 mi->is_transposed_p = true;
2088 else
2090 for (i = 0; i < mi->min_indirect_level_escape; i++)
2091 mi->dim_map[i] = i;
2093 /* Call statement of allocation site of level 0. */
2094 call_stmt_0 = mi->malloc_for_level[0];
2096 /* Finds the correct malloc information. */
2097 collect_data_for_malloc_call (call_stmt_0, &mcd);
2099 mi->dimension_size[0] = mcd.size_var;
2100 mi->dimension_size_orig[0] = mcd.size_var;
2101 /* Make sure that the variables in the size expression for
2102 all the dimensions (above level 0) aren't modified in
2103 the allocation function. */
2104 for (i = 1; i < mi->min_indirect_level_escape; i++)
2106 tree t;
2107 check_var_data data;
2109 /* mi->dimension_size must contain the expression of the size calculated
2110 in check_allocation_function. */
2111 gcc_assert (mi->dimension_size[i]);
2113 data.fn = mi->allocation_function_decl;
2114 data.stmt = NULL;
2115 t = walk_tree_without_duplicates (&(mi->dimension_size[i]),
2116 check_var_notmodified_p,
2117 &data);
2118 if (t != NULL_TREE)
2120 mark_min_matrix_escape_level (mi, i, data.stmt);
2121 break;
2125 if (mi->min_indirect_level_escape < 2)
2126 return 1;
2128 /* Since we should make sure that the size expression is available
2129 before the call to malloc of level 0. */
2130 gsi = gsi_for_stmt (call_stmt_0);
2132 /* Find out the size of each dimension by looking at the malloc
2133 sites and create a global variable to hold it.
2134 We add the assignment to the global before the malloc of level 0. */
2136 /* To be able to produce gimple temporaries. */
2137 oldfn = current_function_decl;
2138 current_function_decl = mi->allocation_function_decl;
2139 push_cfun (DECL_STRUCT_FUNCTION (mi->allocation_function_decl));
2141 /* Set the dimension sizes as follows:
2142 DIM_SIZE[i] = DIM_SIZE[n] * ... * DIM_SIZE[i]
2143 where n is the maximum non escaping level. */
2144 element_size = mi->dimension_type_size[mi->min_indirect_level_escape];
2145 prev_dim_size = NULL_TREE;
2147 for (i = mi->min_indirect_level_escape - 1; i >= 0; i--)
2149 tree dim_size, dim_var;
2150 gimple stmt;
2151 tree d_type_size;
2153 /* Now put the size expression in a global variable and initialize it to
2154 the size expression before the malloc of level 0. */
2155 dim_var =
2156 add_new_static_var (TREE_TYPE
2157 (mi->dimension_size_orig[mi->dim_map[i]]));
2158 type = TREE_TYPE (mi->dimension_size_orig[mi->dim_map[i]]);
2160 /* DIM_SIZE = MALLOC_SIZE_PARAM / TYPE_SIZE. */
2161 /* Find which dim ID becomes dim I. */
2162 for (id = 0; id < mi->min_indirect_level_escape; id++)
2163 if (mi->dim_map[id] == i)
2164 break;
2165 d_type_size =
2166 build_int_cst (type, mi->dimension_type_size[id + 1]);
2167 if (!prev_dim_size)
2168 prev_dim_size = build_int_cst (type, element_size);
2169 if (!check_transpose_p && i == mi->min_indirect_level_escape - 1)
2171 dim_size = mi->dimension_size_orig[id];
2173 else
2175 dim_size =
2176 fold_build2 (TRUNC_DIV_EXPR, type, mi->dimension_size_orig[id],
2177 d_type_size);
2179 dim_size = fold_build2 (MULT_EXPR, type, dim_size, prev_dim_size);
2181 dim_size = force_gimple_operand_gsi (&gsi, dim_size, true, NULL,
2182 true, GSI_SAME_STMT);
2183 /* GLOBAL_HOLDING_THE_SIZE = DIM_SIZE. */
2184 stmt = gimple_build_assign (dim_var, dim_size);
2185 mark_symbols_for_renaming (stmt);
2186 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2188 prev_dim_size = mi->dimension_size[i] = dim_var;
2190 update_ssa (TODO_update_ssa);
2191 /* Replace the malloc size argument in the malloc of level 0 to be
2192 the size of all the dimensions. */
2193 c_node = cgraph_node (mi->allocation_function_decl);
2194 old_size_0 = gimple_call_arg (call_stmt_0, 0);
2195 tmp = force_gimple_operand_gsi (&gsi, mi->dimension_size[0], true,
2196 NULL, true, GSI_SAME_STMT);
2197 if (TREE_CODE (old_size_0) == SSA_NAME)
2199 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, old_size_0)
2200 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2201 if (use_stmt == call_stmt_0)
2202 SET_USE (use_p, tmp);
2204 /* When deleting the calls to malloc we need also to remove the edge from
2205 the call graph to keep it consistent. Notice that cgraph_edge may
2206 create a new node in the call graph if there is no node for the given
2207 declaration; this shouldn't be the case but currently there is no way to
2208 check this outside of "cgraph.c". */
2209 for (i = 1; i < mi->min_indirect_level_escape; i++)
2211 gimple_stmt_iterator gsi;
2212 gimple use_stmt1 = NULL;
2214 gimple call_stmt = mi->malloc_for_level[i];
2215 gcc_assert (is_gimple_call (call_stmt));
2216 e = cgraph_edge (c_node, call_stmt);
2217 gcc_assert (e);
2218 cgraph_remove_edge (e);
2219 gsi = gsi_for_stmt (call_stmt);
2220 /* Remove the call stmt. */
2221 gsi_remove (&gsi, true);
2222 /* remove the type cast stmt. */
2223 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2224 gimple_call_lhs (call_stmt))
2226 use_stmt1 = use_stmt;
2227 gsi = gsi_for_stmt (use_stmt);
2228 gsi_remove (&gsi, true);
2230 /* Remove the assignment of the allocated area. */
2231 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2232 gimple_get_lhs (use_stmt1))
2234 gsi = gsi_for_stmt (use_stmt);
2235 gsi_remove (&gsi, true);
2238 update_ssa (TODO_update_ssa);
2239 #ifdef ENABLE_CHECKING
2240 verify_ssa (true);
2241 #endif
2242 /* Delete the calls to free. */
2243 for (i = 1; i < mi->min_indirect_level_escape; i++)
2245 gimple_stmt_iterator gsi;
2247 /* ??? wonder why this case is possible but we failed on it once. */
2248 if (!mi->free_stmts[i].stmt)
2249 continue;
2251 c_node = cgraph_node (mi->free_stmts[i].func);
2252 gcc_assert (is_gimple_call (mi->free_stmts[i].stmt));
2253 e = cgraph_edge (c_node, mi->free_stmts[i].stmt);
2254 gcc_assert (e);
2255 cgraph_remove_edge (e);
2256 current_function_decl = mi->free_stmts[i].func;
2257 set_cfun (DECL_STRUCT_FUNCTION (mi->free_stmts[i].func));
2258 gsi = gsi_for_stmt (mi->free_stmts[i].stmt);
2259 gsi_remove (&gsi, true);
2261 /* Return to the previous situation. */
2262 current_function_decl = oldfn;
2263 pop_cfun ();
2264 return 1;
2269 /* Print out the results of the escape analysis. */
2270 static int
2271 dump_matrix_reorg_analysis (void **slot, void *data ATTRIBUTE_UNUSED)
2273 struct matrix_info *mi = (struct matrix_info *) *slot;
2275 if (!dump_file)
2276 return 1;
2277 fprintf (dump_file, "Matrix \"%s\"; Escaping Level: %d, Num Dims: %d,",
2278 get_name (mi->decl), mi->min_indirect_level_escape, mi->num_dims);
2279 fprintf (dump_file, " Malloc Dims: %d, ", mi->max_malloced_level);
2280 fprintf (dump_file, "\n");
2281 if (mi->min_indirect_level_escape >= 2)
2282 fprintf (dump_file, "Flattened %d dimensions \n",
2283 mi->min_indirect_level_escape);
2284 return 1;
2287 /* Perform matrix flattening. */
2289 static unsigned int
2290 matrix_reorg (void)
2292 struct cgraph_node *node;
2294 if (profile_info)
2295 check_transpose_p = true;
2296 else
2297 check_transpose_p = false;
2298 /* If there are hand written vectors, we skip this optimization. */
2299 for (node = cgraph_nodes; node; node = node->next)
2300 if (!may_flatten_matrices (node))
2301 return 0;
2302 matrices_to_reorg = htab_create (37, mtt_info_hash, mtt_info_eq, mat_free);
2303 /* Find and record all potential matrices in the program. */
2304 find_matrices_decl ();
2305 /* Analyze the accesses of the matrices (escaping analysis). */
2306 for (node = cgraph_nodes; node; node = node->next)
2307 if (node->analyzed)
2309 tree temp_fn;
2311 temp_fn = current_function_decl;
2312 current_function_decl = node->decl;
2313 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2314 bitmap_obstack_initialize (NULL);
2315 gimple_register_cfg_hooks ();
2317 if (!gimple_in_ssa_p (cfun))
2319 free_dominance_info (CDI_DOMINATORS);
2320 free_dominance_info (CDI_POST_DOMINATORS);
2321 pop_cfun ();
2322 current_function_decl = temp_fn;
2323 bitmap_obstack_release (NULL);
2325 return 0;
2328 #ifdef ENABLE_CHECKING
2329 verify_flow_info ();
2330 #endif
2332 if (!matrices_to_reorg)
2334 free_dominance_info (CDI_DOMINATORS);
2335 free_dominance_info (CDI_POST_DOMINATORS);
2336 pop_cfun ();
2337 current_function_decl = temp_fn;
2338 bitmap_obstack_release (NULL);
2340 return 0;
2343 /* Create htap for phi nodes. */
2344 htab_mat_acc_phi_nodes = htab_create (37, mat_acc_phi_hash,
2345 mat_acc_phi_eq, free);
2346 if (!check_transpose_p)
2347 find_sites_in_func (false);
2348 else
2350 find_sites_in_func (true);
2351 loop_optimizer_init (LOOPS_NORMAL);
2352 if (current_loops)
2353 scev_initialize ();
2354 htab_traverse (matrices_to_reorg, analyze_transpose, NULL);
2355 if (current_loops)
2357 scev_finalize ();
2358 loop_optimizer_finalize ();
2359 current_loops = NULL;
2362 /* If the current function is the allocation function for any of
2363 the matrices we check its allocation and the escaping level. */
2364 htab_traverse (matrices_to_reorg, check_allocation_function, NULL);
2365 free_dominance_info (CDI_DOMINATORS);
2366 free_dominance_info (CDI_POST_DOMINATORS);
2367 pop_cfun ();
2368 current_function_decl = temp_fn;
2369 bitmap_obstack_release (NULL);
2371 htab_traverse (matrices_to_reorg, transform_allocation_sites, NULL);
2372 /* Now transform the accesses. */
2373 for (node = cgraph_nodes; node; node = node->next)
2374 if (node->analyzed)
2376 /* Remember that allocation sites have been handled. */
2377 tree temp_fn;
2379 temp_fn = current_function_decl;
2380 current_function_decl = node->decl;
2381 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2382 bitmap_obstack_initialize (NULL);
2383 gimple_register_cfg_hooks ();
2384 record_all_accesses_in_func ();
2385 htab_traverse (matrices_to_reorg, transform_access_sites, NULL);
2386 free_dominance_info (CDI_DOMINATORS);
2387 free_dominance_info (CDI_POST_DOMINATORS);
2388 pop_cfun ();
2389 current_function_decl = temp_fn;
2390 bitmap_obstack_release (NULL);
2392 htab_traverse (matrices_to_reorg, dump_matrix_reorg_analysis, NULL);
2394 current_function_decl = NULL;
2395 set_cfun (NULL);
2396 matrices_to_reorg = NULL;
2397 return 0;
2401 /* The condition for matrix flattening to be performed. */
2402 static bool
2403 gate_matrix_reorg (void)
2405 return flag_ipa_matrix_reorg && flag_whole_program;
2408 struct simple_ipa_opt_pass pass_ipa_matrix_reorg =
2411 SIMPLE_IPA_PASS,
2412 "matrix-reorg", /* name */
2413 gate_matrix_reorg, /* gate */
2414 matrix_reorg, /* execute */
2415 NULL, /* sub */
2416 NULL, /* next */
2417 0, /* static_pass_number */
2418 0, /* tv_id */
2419 0, /* properties_required */
2420 PROP_trees, /* properties_provided */
2421 0, /* properties_destroyed */
2422 0, /* todo_flags_start */
2423 TODO_dump_cgraph | TODO_dump_func /* todo_flags_finish */