gcc/matrix-reorg.c

   1 /* Matrix layout transformations.
   2    Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
   3    Contributed by Razya Ladelsky <razya@il.ibm.com>
   4    Originally written by Revital Eres and Mustafa Hagog.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 /*
  23    Matrix flattening optimization tries to replace a N-dimensional
  24    matrix with its equivalent M-dimensional matrix, where M < N.
  25    This first implementation focuses on global matrices defined dynamically.
  26
  27    When N==1, we actually flatten the whole matrix.
  28    For instance consider a two-dimensional array a [dim1] [dim2].
  29    The code for allocating space for it usually looks like:
  30
  31      a = (int **)  malloc(dim1 * sizeof(int *));
  32      for (i=0; i<dim1; i++)
  33         a[i] = (int *) malloc (dim2 * sizeof(int));
  34
  35    If the array "a" is found suitable for this optimization,
  36    its allocation is replaced by:
  37
  38      a = (int *) malloc (dim1 * dim2 *sizeof(int));
  39
  40    and all the references to a[i][j] are replaced by a[i * dim2 + j].
  41
  42    The two main phases of the optimization are the analysis
  43    and transformation.
  44    The driver of the optimization is matrix_reorg ().
  45
  46
  47
  48    Analysis phase:
  49    ===============
  50
  51    We'll number the dimensions outside-in, meaning the most external
  52    is 0, then 1, and so on.
  53    The analysis part of the optimization determines K, the escape
  54    level of a N-dimensional matrix (K <= N), that allows flattening of
  55    the external dimensions 0,1,..., K-1. Escape level 0 means that the
  56    whole matrix escapes and no flattening is possible.
  57
  58    The analysis part is implemented in analyze_matrix_allocation_site()
  59    and analyze_matrix_accesses().
  60
  61    Transformation phase:
  62    =====================
  63    In this phase we define the new flattened matrices that replace the
  64    original matrices in the code.
  65    Implemented in transform_allocation_sites(),
  66    transform_access_sites().
  67
  68    Matrix Transposing
  69    ==================
  70    The idea of Matrix Transposing is organizing the matrix in a different
  71    layout such that the dimensions are reordered.
  72    This could produce better cache behavior in some cases.
  73
  74    For example, lets look at the matrix accesses in the following loop:
  75
  76    for (i=0; i<N; i++)
  77     for (j=0; j<M; j++)
  78      access to a[i][j]
  79
  80    This loop can produce good cache behavior because the elements of
  81    the inner dimension are accessed sequentially.
  82
  83   However, if the accesses of the matrix were of the following form:
  84
  85   for (i=0; i<N; i++)
  86    for (j=0; j<M; j++)
  87      access to a[j][i]
  88
  89   In this loop we iterate the columns and not the rows.
  90   Therefore, replacing the rows and columns
  91   would have had an organization with better (cache) locality.
  92   Replacing the dimensions of the matrix is called matrix transposing.
  93
  94   This  example, of course, could be enhanced to multiple dimensions matrices
  95   as well.
  96
  97   Since a program could include all kind of accesses, there is a decision
  98   mechanism, implemented in analyze_transpose(), which implements a
  99   heuristic that tries to determine whether to transpose the matrix or not,
 100   according to the form of the more dominant accesses.
 101   This decision is transferred to the flattening mechanism, and whether
 102   the matrix was transposed or not, the matrix is flattened (if possible).
 103
 104   This decision making is based on profiling information and loop information.
 105   If profiling information is available, decision making mechanism will be
 106   operated, otherwise the matrix will only be flattened (if possible).
 107
 108   Both optimizations are described in the paper "Matrix flattening and
 109   transposing in GCC" which was presented in GCC summit 2006.
 110   http://www.gccsummit.org/2006/2006-GCC-Summit-Proceedings.pdf.  */
 111
 112 #include "config.h"
 113 #include "system.h"
 114 #include "coretypes.h"
 115 #include "tm.h"
 116 #include "tree.h"
 117 #include "rtl.h"
 118 #include "c-tree.h"
 119 #include "tree-inline.h"
 120 #include "tree-flow.h"
 121 #include "tree-flow-inline.h"
 122 #include "langhooks.h"
 123 #include "hashtab.h"
 124 #include "toplev.h"
 125 #include "flags.h"
 126 #include "ggc.h"
 127 #include "debug.h"
 128 #include "target.h"
 129 #include "cgraph.h"
 130 #include "diagnostic.h"
 131 #include "timevar.h"
 132 #include "params.h"
 133 #include "fibheap.h"
 134 #include "c-common.h"
 135 #include "intl.h"
 136 #include "function.h"
 137 #include "basic-block.h"
 138 #include "cfgloop.h"
 139 #include "tree-iterator.h"
 140 #include "tree-pass.h"
 141 #include "opts.h"
 142 #include "tree-data-ref.h"
 143 #include "tree-chrec.h"
 144 #include "tree-scalar-evolution.h"
 145
 146   /* FIXME tuples.  */
 147 #if 0
 148 /* We need to collect a lot of data from the original malloc,
 149    particularly as the gimplifier has converted:
 150
 151    orig_var = (struct_type *) malloc (x * sizeof (struct_type *));
 152
 153    into
 154
 155    T3 = <constant> ;  ** <constant> is amount to malloc; precomputed **
 156    T4 = malloc (T3);
 157    T5 = (struct_type *) T4;
 158    orig_var = T5;
 159
 160    The following struct fields allow us to collect all the necessary data from
 161    the gimplified program.  The comments in the struct below are all based
 162    on the gimple example above.  */
 163
 164 struct malloc_call_data
 165 {
 166   tree call_stmt;               /* Tree for "T4 = malloc (T3);"                     */
 167   tree size_var;                /* Var decl for T3.                                 */
 168   tree malloc_size;             /* Tree for "<constant>", the rhs assigned to T3.   */
 169 };
 170
 171 /* The front end of the compiler, when parsing statements of the form:
 172
 173    var = (type_cast) malloc (sizeof (type));
 174
 175    always converts this single statement into the following statements
 176    (GIMPLE form):
 177
 178    T.1 = sizeof (type);
 179    T.2 = malloc (T.1);
 180    T.3 = (type_cast) T.2;
 181    var = T.3;
 182
 183    Since we need to create new malloc statements and modify the original
 184    statements somewhat, we need to find all four of the above statements.
 185    Currently record_call_1 (called for building cgraph edges) finds and
 186    records the statements containing the actual call to malloc, but we
 187    need to find the rest of the variables/statements on our own.  That
 188    is what the following function does.  */
 189 static void
 190 collect_data_for_malloc_call (tree stmt, struct malloc_call_data *m_data)
 191 {
 192   tree size_var = NULL;
 193   tree malloc_fn_decl;
 194   tree tmp;
 195   tree arg1;
 196
 197   gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
 198
 199   tmp = get_call_expr_in (stmt);
 200   malloc_fn_decl = CALL_EXPR_FN (tmp);
 201   if (TREE_CODE (malloc_fn_decl) != ADDR_EXPR
 202       || TREE_CODE (TREE_OPERAND (malloc_fn_decl, 0)) != FUNCTION_DECL
 203       || DECL_FUNCTION_CODE (TREE_OPERAND (malloc_fn_decl, 0)) !=
 204       BUILT_IN_MALLOC)
 205     return;
 206
 207   arg1 = CALL_EXPR_ARG (tmp, 0);
 208   size_var = arg1;
 209
 210   m_data->call_stmt = stmt;
 211   m_data->size_var = size_var;
 212   if (TREE_CODE (size_var) != VAR_DECL)
 213     m_data->malloc_size = size_var;
 214   else
 215     m_data->malloc_size = NULL_TREE;
 216 }
 217
 218 /* Information about matrix access site.
 219    For example: if an access site of matrix arr is arr[i][j]
 220    the ACCESS_SITE_INFO structure will have the address
 221    of arr as its stmt.  The INDEX_INFO will hold information about the
 222    initial address and index of each dimension.  */
 223 struct access_site_info
 224 {
 225   /* The statement (INDIRECT_REF or POINTER_PLUS_EXPR).  */
 226   tree stmt;
 227
 228   /* In case of POINTER_PLUS_EXPR, what is the offset.  */
 229   tree offset;
 230
 231   /* The index which created the offset.  */
 232   tree index;
 233
 234   /* The indirection level of this statement.  */
 235   int level;
 236
 237   /* TRUE for allocation site FALSE for access site.  */
 238   bool is_alloc;
 239
 240   /* The function containing the access site.  */
 241   tree function_decl;
 242
 243   /* This access is iterated in the inner most loop */
 244   bool iterated_by_inner_most_loop_p;
 245 };
 246
 247 typedef struct access_site_info *access_site_info_p;
 248 DEF_VEC_P (access_site_info_p);
 249 DEF_VEC_ALLOC_P (access_site_info_p, heap);
 250
 251 /* Information about matrix to flatten.  */
 252 struct matrix_info
 253 {
 254   /* Decl tree of this matrix.  */
 255   tree decl;
 256   /* Number of dimensions; number
 257      of "*" in the type declaration.  */
 258   int num_dims;
 259
 260   /* Minimum indirection level that escapes, 0 means that
 261      the whole matrix escapes, k means that dimensions
 262      0 to ACTUAL_DIM - k escapes.  */
 263   int min_indirect_level_escape;
 264
 265   tree min_indirect_level_escape_stmt;
 266
 267   /* Is the matrix transposed.  */
 268   bool is_transposed_p;
 269
 270   /* Hold the allocation site for each level (dimension).
 271      We can use NUM_DIMS as the upper bound and allocate the array
 272      once with this number of elements and no need to use realloc and
 273      MAX_MALLOCED_LEVEL.  */
 274   tree *malloc_for_level;
 275
 276   int max_malloced_level;
 277
 278   /* The location of the allocation sites (they must be in one
 279      function).  */
 280   tree allocation_function_decl;
 281
 282   /* The calls to free for each level of indirection.  */
 283   struct free_info
 284   {
 285     tree stmt;
 286     tree func;
 287   } *free_stmts;
 288
 289   /* An array which holds for each dimension its size. where
 290      dimension 0 is the outer most (one that contains all the others).
 291    */
 292   tree *dimension_size;
 293
 294   /* An array which holds for each dimension it's original size
 295      (before transposing and flattening take place).  */
 296   tree *dimension_size_orig;
 297
 298   /* An array which holds for each dimension the size of the type of
 299      of elements accessed in that level (in bytes).  */
 300   HOST_WIDE_INT *dimension_type_size;
 301
 302   int dimension_type_size_len;
 303
 304   /* An array collecting the count of accesses for each dimension.  */
 305   gcov_type *dim_hot_level;
 306
 307   /* An array of the accesses to be flattened.
 308      elements are of type "struct access_site_info *".  */
 309     VEC (access_site_info_p, heap) * access_l;
 310
 311   /* A map of how the dimensions will be organized at the end of
 312      the analyses.  */
 313   int *dim_map;
 314 };
 315
 316 /* In each phi node we want to record the indirection level we have when we
 317    get to the phi node.  Usually we will have phi nodes with more than two
 318    arguments, then we must assure that all of them get to the phi node with
 319    the same indirection level, otherwise it's not safe to do the flattening.
 320    So we record the information regarding the indirection level each time we
 321    get to the phi node in this hash table.  */
 322
 323 struct matrix_access_phi_node
 324 {
 325   tree phi;
 326   int indirection_level;
 327 };
 328
 329 /* We use this structure to find if the SSA variable is accessed inside the
 330    tree and record the tree containing it.  */
 331
 332 struct ssa_acc_in_tree
 333 {
 334   /* The variable whose accesses in the tree we are looking for.  */
 335   tree ssa_var;
 336   /* The tree and code inside it the ssa_var is accessed, currently
 337      it could be an INDIRECT_REF or CALL_EXPR.  */
 338   enum tree_code t_code;
 339   tree t_tree;
 340   /* The place in the containing tree.  */
 341   tree *tp;
 342   tree second_op;
 343   bool var_found;
 344 };
 345
 346 static void analyze_matrix_accesses (struct matrix_info *, tree, int, bool,
 347                                      sbitmap, bool);
 348 static int transform_allocation_sites (void **, void *);
 349 static int transform_access_sites (void **, void *);
 350 static int analyze_transpose (void **, void *);
 351 static int dump_matrix_reorg_analysis (void **, void *);
 352
 353 static bool check_transpose_p;
 354
 355 /* Hash function used for the phi nodes.  */
 356
 357 static hashval_t
 358 mat_acc_phi_hash (const void *p)
 359 {
 360   const struct matrix_access_phi_node *const ma_phi =
 361     (const struct matrix_access_phi_node *) p;
 362
 363   return htab_hash_pointer (ma_phi->phi);
 364 }
 365
 366 /* Equality means phi node pointers are the same.  */
 367
 368 static int
 369 mat_acc_phi_eq (const void *p1, const void *p2)
 370 {
 371   const struct matrix_access_phi_node *const phi1 =
 372     (const struct matrix_access_phi_node *) p1;
 373   const struct matrix_access_phi_node *const phi2 =
 374     (const struct matrix_access_phi_node *) p2;
 375
 376   if (phi1->phi == phi2->phi)
 377     return 1;
 378
 379   return 0;
 380 }
 381
 382 /* Hold the PHI nodes we visit during the traversal for escaping
 383    analysis.  */
 384 static htab_t htab_mat_acc_phi_nodes = NULL;
 385
 386 /* This hash-table holds the information about the matrices we are
 387    going to handle.  */
 388 static htab_t matrices_to_reorg = NULL;
 389
 390 /* Return a hash for MTT, which is really a "matrix_info *".  */
 391 static hashval_t
 392 mtt_info_hash (const void *mtt)
 393 {
 394   return htab_hash_pointer (((const struct matrix_info *) mtt)->decl);
 395 }
 396
 397 /* Return true if MTT1 and MTT2 (which are really both of type
 398    "matrix_info *") refer to the same decl.  */
 399 static int
 400 mtt_info_eq (const void *mtt1, const void *mtt2)
 401 {
 402   const struct matrix_info *const i1 = (const struct matrix_info *) mtt1;
 403   const struct matrix_info *const i2 = (const struct matrix_info *) mtt2;
 404
 405   if (i1->decl == i2->decl)
 406     return true;
 407
 408   return false;
 409 }
 410
 411 /* Return the inner most tree that is not a cast.  */
 412 static tree
 413 get_inner_of_cast_expr (tree t)
 414 {
 415   while (CONVERT_EXPR_P (t)
 416          || TREE_CODE (t) == VIEW_CONVERT_EXPR)
 417     t = TREE_OPERAND (t, 0);
 418
 419   return t;
 420 }
 421
 422 /* Return false if STMT may contain a vector expression.
 423    In this situation, all matrices should not be flattened.  */
 424 static bool
 425 may_flatten_matrices_1 (tree stmt)
 426 {
 427   tree t;
 428
 429   switch (TREE_CODE (stmt))
 430     {
 431     case GIMPLE_MODIFY_STMT:
 432       t = TREE_OPERAND (stmt, 1);
 433       while (CONVERT_EXPR_P (t))
 434         {
 435           if (TREE_TYPE (t) && POINTER_TYPE_P (TREE_TYPE (t)))
 436             {
 437               tree pointee;
 438
 439               pointee = TREE_TYPE (t);
 440               while (POINTER_TYPE_P (pointee))
 441                 pointee = TREE_TYPE (pointee);
 442               if (TREE_CODE (pointee) == VECTOR_TYPE)
 443                 {
 444                   if (dump_file)
 445                     fprintf (dump_file,
 446                              "Found vector type, don't flatten matrix\n");
 447                   return false;
 448                 }
 449             }
 450           t = TREE_OPERAND (t, 0);
 451         }
 452       break;
 453     case ASM_EXPR:
 454       /* Asm code could contain vector operations.  */
 455       return false;
 456       break;
 457     default:
 458       break;
 459     }
 460   return true;
 461 }
 462
 463 /* Return false if there are hand-written vectors in the program.
 464    We disable the flattening in such a case.  */
 465 static bool
 466 may_flatten_matrices (struct cgraph_node *node)
 467 {
 468   tree decl;
 469   struct function *func;
 470   basic_block bb;
 471   block_stmt_iterator bsi;
 472
 473   decl = node->decl;
 474   if (node->analyzed)
 475     {
 476       func = DECL_STRUCT_FUNCTION (decl);
 477       FOR_EACH_BB_FN (bb, func)
 478         for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 479         if (!may_flatten_matrices_1 (bsi_stmt (bsi)))
 480           return false;
 481     }
 482   return true;
 483 }
 484
 485 /* Given a VAR_DECL, check its type to determine whether it is
 486    a definition of a dynamic allocated matrix and therefore is
 487    a suitable candidate for the matrix flattening optimization.
 488    Return NULL if VAR_DECL is not such decl.  Otherwise, allocate
 489    a MATRIX_INFO structure, fill it with the relevant information
 490    and return a pointer to it.
 491    TODO: handle also statically defined arrays.  */
 492 static struct matrix_info *
 493 analyze_matrix_decl (tree var_decl)
 494 {
 495   struct matrix_info *m_node, tmpmi, *mi;
 496   tree var_type;
 497   int dim_num = 0;
 498
 499   gcc_assert (matrices_to_reorg);
 500
 501   if (TREE_CODE (var_decl) == PARM_DECL)
 502     var_type = DECL_ARG_TYPE (var_decl);
 503   else if (TREE_CODE (var_decl) == VAR_DECL)
 504     var_type = TREE_TYPE (var_decl);
 505   else
 506     return NULL;
 507
 508   if (!POINTER_TYPE_P (var_type))
 509     return NULL;
 510
 511   while (POINTER_TYPE_P (var_type))
 512     {
 513       var_type = TREE_TYPE (var_type);
 514       dim_num++;
 515     }
 516
 517   if (dim_num <= 1)
 518     return NULL;
 519
 520   if (!COMPLETE_TYPE_P (var_type)
 521       || TREE_CODE (TYPE_SIZE_UNIT (var_type)) != INTEGER_CST)
 522     return NULL;
 523
 524   /* Check to see if this pointer is already in there.  */
 525   tmpmi.decl = var_decl;
 526   mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi);
 527
 528   if (mi)
 529     return NULL;
 530
 531   /* Record the matrix.  */
 532
 533   m_node = (struct matrix_info *) xcalloc (1, sizeof (struct matrix_info));
 534   m_node->decl = var_decl;
 535   m_node->num_dims = dim_num;
 536   m_node->free_stmts
 537     = (struct free_info *) xcalloc (dim_num, sizeof (struct free_info));
 538
 539   /* Init min_indirect_level_escape to -1 to indicate that no escape
 540      analysis has been done yet.  */
 541   m_node->min_indirect_level_escape = -1;
 542   m_node->is_transposed_p = false;
 543
 544   return m_node;
 545 }
 546
 547 /* Free matrix E.  */
 548 static void
 549 mat_free (void *e)
 550 {
 551   struct matrix_info *mat = (struct matrix_info *) e;
 552
 553   if (!mat)
 554     return;
 555
 556   if (mat->free_stmts)
 557     free (mat->free_stmts);
 558   if (mat->dim_hot_level)
 559     free (mat->dim_hot_level);
 560   if (mat->malloc_for_level)
 561     free (mat->malloc_for_level);
 562 }
 563
 564 /* Find all potential matrices.
 565    TODO: currently we handle only multidimensional
 566    dynamically allocated arrays.  */
 567 static void
 568 find_matrices_decl (void)
 569 {
 570   struct matrix_info *tmp;
 571   PTR *slot;
 572   struct varpool_node *vnode;
 573
 574   gcc_assert (matrices_to_reorg);
 575
 576   /* For every global variable in the program:
 577      Check to see if it's of a candidate type and record it.  */
 578   for (vnode = varpool_nodes_queue; vnode; vnode = vnode->next_needed)
 579     {
 580       tree var_decl = vnode->decl;
 581
 582       if (!var_decl || TREE_CODE (var_decl) != VAR_DECL)
 583         continue;
 584
 585       if (matrices_to_reorg)
 586         if ((tmp = analyze_matrix_decl (var_decl)))
 587           {
 588             if (!TREE_ADDRESSABLE (var_decl))
 589               {
 590                 slot = htab_find_slot (matrices_to_reorg, tmp, INSERT);
 591                 *slot = tmp;
 592               }
 593           }
 594     }
 595   return;
 596 }
 597
 598 /* Mark that the matrix MI escapes at level L.  */
 599 static void
 600 mark_min_matrix_escape_level (struct matrix_info *mi, int l, tree s)
 601 {
 602   if (mi->min_indirect_level_escape == -1
 603       || (mi->min_indirect_level_escape > l))
 604     {
 605       mi->min_indirect_level_escape = l;
 606       mi->min_indirect_level_escape_stmt = s;
 607     }
 608 }
 609
 610 /* Find if the SSA variable is accessed inside the
 611    tree and record the tree containing it.
 612    The only relevant uses are the case of SSA_NAME, or SSA inside
 613    INDIRECT_REF, CALL_EXPR, PLUS_EXPR, POINTER_PLUS_EXPR, MULT_EXPR.  */
 614 static void
 615 ssa_accessed_in_tree (tree t, struct ssa_acc_in_tree *a)
 616 {
 617   tree call, decl;
 618   tree arg;
 619   call_expr_arg_iterator iter;
 620
 621   a->t_code = TREE_CODE (t);
 622   switch (a->t_code)
 623     {
 624       tree op1, op2;
 625
 626     case SSA_NAME:
 627       if (t == a->ssa_var)
 628         a->var_found = true;
 629       break;
 630     case INDIRECT_REF:
 631       if (SSA_VAR_P (TREE_OPERAND (t, 0))
 632           && TREE_OPERAND (t, 0) == a->ssa_var)
 633         a->var_found = true;
 634       break;
 635     case CALL_EXPR:
 636       FOR_EACH_CALL_EXPR_ARG (arg, iter, t)
 637       {
 638         if (arg == a->ssa_var)
 639           {
 640             a->var_found = true;
 641             call = get_call_expr_in (t);
 642             if (call && (decl = get_callee_fndecl (call)))
 643               a->t_tree = decl;
 644             break;
 645           }
 646       }
 647       break;
 648     case POINTER_PLUS_EXPR:
 649     case PLUS_EXPR:
 650     case MULT_EXPR:
 651       op1 = TREE_OPERAND (t, 0);
 652       op2 = TREE_OPERAND (t, 1);
 653
 654       if (op1 == a->ssa_var)
 655         {
 656           a->var_found = true;
 657           a->second_op = op2;
 658         }
 659       else if (op2 == a->ssa_var)
 660         {
 661           a->var_found = true;
 662           a->second_op = op1;
 663         }
 664       break;
 665     default:
 666       break;
 667     }
 668 }
 669
 670 /* Record the access/allocation site information for matrix MI so we can
 671    handle it later in transformation.  */
 672 static void
 673 record_access_alloc_site_info (struct matrix_info *mi, tree stmt, tree offset,
 674                                tree index, int level, bool is_alloc)
 675 {
 676   struct access_site_info *acc_info;
 677
 678   if (!mi->access_l)
 679     mi->access_l = VEC_alloc (access_site_info_p, heap, 100);
 680
 681   acc_info
 682     = (struct access_site_info *)
 683     xcalloc (1, sizeof (struct access_site_info));
 684   acc_info->stmt = stmt;
 685   acc_info->offset = offset;
 686   acc_info->index = index;
 687   acc_info->function_decl = current_function_decl;
 688   acc_info->level = level;
 689   acc_info->is_alloc = is_alloc;
 690
 691   VEC_safe_push (access_site_info_p, heap, mi->access_l, acc_info);
 692
 693 }
 694
 695 /* Record the malloc as the allocation site of the given LEVEL.  But
 696    first we Make sure that all the size parameters passed to malloc in
 697    all the allocation sites could be pre-calculated before the call to
 698    the malloc of level 0 (the main malloc call).  */
 699 static void
 700 add_allocation_site (struct matrix_info *mi, tree stmt, int level)
 701 {
 702   struct malloc_call_data mcd;
 703
 704   /* Make sure that the allocation sites are in the same function.  */
 705   if (!mi->allocation_function_decl)
 706     mi->allocation_function_decl = current_function_decl;
 707   else if (mi->allocation_function_decl != current_function_decl)
 708     {
 709       int min_malloc_level;
 710
 711       gcc_assert (mi->malloc_for_level);
 712
 713       /* Find the minimum malloc level that already has been seen;
 714          we known its allocation function must be
 715          MI->allocation_function_decl since it's different than
 716          CURRENT_FUNCTION_DECL then the escaping level should be
 717          MIN (LEVEL, MIN_MALLOC_LEVEL) - 1 , and the allocation function
 718          must be set accordingly.  */
 719       for (min_malloc_level = 0;
 720            min_malloc_level < mi->max_malloced_level
 721            && mi->malloc_for_level[min_malloc_level]; min_malloc_level++);
 722       if (level < min_malloc_level)
 723         {
 724           mi->allocation_function_decl = current_function_decl;
 725           mark_min_matrix_escape_level (mi, min_malloc_level, stmt);
 726         }
 727       else
 728         {
 729           mark_min_matrix_escape_level (mi, level, stmt);
 730           /* cannot be that (level == min_malloc_level)
 731              we would have returned earlier.  */
 732           return;
 733         }
 734     }
 735
 736   /* Find the correct malloc information.  */
 737   collect_data_for_malloc_call (stmt, &mcd);
 738
 739   /* We accept only calls to malloc function; we do not accept
 740      calls like calloc and realloc.  */
 741   if (!mi->malloc_for_level)
 742     {
 743       mi->malloc_for_level = XCNEWVEC (tree, level + 1);
 744       mi->max_malloced_level = level + 1;
 745     }
 746   else if (mi->max_malloced_level <= level)
 747     {
 748       mi->malloc_for_level
 749         = XRESIZEVEC (tree, mi->malloc_for_level, level + 1);
 750
 751       /* Zero the newly allocated items.  */
 752       memset (&(mi->malloc_for_level[mi->max_malloced_level + 1]),
 753               0, (level - mi->max_malloced_level) * sizeof (tree));
 754
 755       mi->max_malloced_level = level + 1;
 756     }
 757   mi->malloc_for_level[level] = stmt;
 758 }
 759
 760 /* Given an assignment statement STMT that we know that its
 761    left-hand-side is the matrix MI variable, we traverse the immediate
 762    uses backwards until we get to a malloc site.  We make sure that
 763    there is one and only one malloc site that sets this variable.  When
 764    we are performing the flattening we generate a new variable that
 765    will hold the size for each dimension; each malloc that allocates a
 766    dimension has the size parameter; we use that parameter to
 767    initialize the dimension size variable so we can use it later in
 768    the address calculations.  LEVEL is the dimension we're inspecting.
 769    Return if STMT is related to an allocation site.  */
 770
 771 static void
 772 analyze_matrix_allocation_site (struct matrix_info *mi, tree stmt,
 773                                 int level, sbitmap visited)
 774 {
 775   if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
 776     {
 777       tree rhs = TREE_OPERAND (stmt, 1);
 778
 779       rhs = get_inner_of_cast_expr (rhs);
 780       if (TREE_CODE (rhs) == SSA_NAME)
 781         {
 782           tree def = SSA_NAME_DEF_STMT (rhs);
 783
 784           analyze_matrix_allocation_site (mi, def, level, visited);
 785           return;
 786         }
 787
 788       /* A result of call to malloc.  */
 789       else if (TREE_CODE (rhs) == CALL_EXPR)
 790         {
 791           int call_flags = call_expr_flags (rhs);
 792
 793           if (!(call_flags & ECF_MALLOC))
 794             {
 795               mark_min_matrix_escape_level (mi, level, stmt);
 796               return;
 797             }
 798           else
 799             {
 800               tree malloc_fn_decl;
 801               const char *malloc_fname;
 802
 803               malloc_fn_decl = CALL_EXPR_FN (rhs);
 804               if (TREE_CODE (malloc_fn_decl) != ADDR_EXPR
 805                   || TREE_CODE (TREE_OPERAND (malloc_fn_decl, 0)) !=
 806                   FUNCTION_DECL)
 807                 {
 808                   mark_min_matrix_escape_level (mi, level, stmt);
 809                   return;
 810                 }
 811               malloc_fn_decl = TREE_OPERAND (malloc_fn_decl, 0);
 812               malloc_fname = IDENTIFIER_POINTER (DECL_NAME (malloc_fn_decl));
 813               if (DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
 814                 {
 815                   if (dump_file)
 816                     fprintf (dump_file,
 817                              "Matrix %s is an argument to function %s\n",
 818                              get_name (mi->decl), get_name (malloc_fn_decl));
 819                   mark_min_matrix_escape_level (mi, level, stmt);
 820                   return;
 821                 }
 822             }
 823           /* This is a call to malloc of level 'level'.
 824              mi->max_malloced_level-1 == level  means that we've
 825              seen a malloc statement of level 'level' before.
 826              If the statement is not the same one that we've
 827              seen before, then there's another malloc statement
 828              for the same level, which means that we need to mark
 829              it escaping.  */
 830           if (mi->malloc_for_level
 831               && mi->max_malloced_level-1 == level
 832               && mi->malloc_for_level[level] != stmt)
 833             {
 834               mark_min_matrix_escape_level (mi, level, stmt);
 835               return;
 836             }
 837           else
 838             add_allocation_site (mi, stmt, level);
 839           return;
 840         }
 841       /* If we are back to the original matrix variable then we
 842          are sure that this is analyzed as an access site.  */
 843       else if (rhs == mi->decl)
 844         return;
 845     }
 846   /* Looks like we don't know what is happening in this
 847      statement so be in the safe side and mark it as escaping.  */
 848   mark_min_matrix_escape_level (mi, level, stmt);
 849 }
 850
 851 /* The transposing decision making.
 852    In order to to calculate the profitability of transposing, we collect two
 853    types of information regarding the accesses:
 854    1. profiling information used to express the hotness of an access, that
 855    is how often the matrix is accessed by this access site (count of the
 856    access site).
 857    2. which dimension in the access site is iterated by the inner
 858    most loop containing this access.
 859
 860    The matrix will have a calculated value of weighted hotness for each
 861    dimension.
 862    Intuitively the hotness level of a dimension is a function of how
 863    many times it was the most frequently accessed dimension in the
 864    highly executed access sites of this matrix.
 865
 866    As computed by following equation:
 867    m      n
 868    __   __
 869    \    \  dim_hot_level[i] +=
 870    /_   /_
 871    j     i
 872                  acc[j]->dim[i]->iter_by_inner_loop * count(j)
 873
 874   Where n is the number of dims and m is the number of the matrix
 875   access sites. acc[j]->dim[i]->iter_by_inner_loop is 1 if acc[j]
 876   iterates over dim[i] in innermost loop, and is 0 otherwise.
 877
 878   The organization of the new matrix should be according to the
 879   hotness of each dimension. The hotness of the dimension implies
 880   the locality of the elements.*/
 881 static int
 882 analyze_transpose (void **slot, void *data ATTRIBUTE_UNUSED)
 883 {
 884   struct matrix_info *mi = (struct matrix_info *) *slot;
 885   int min_escape_l = mi->min_indirect_level_escape;
 886   struct loop *loop;
 887   affine_iv iv;
 888   struct access_site_info *acc_info;
 889   int i;
 890
 891   if (min_escape_l < 2 || !mi->access_l)
 892     {
 893       if (mi->access_l)
 894         {
 895           for (i = 0;
 896                VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
 897                i++)
 898             free (acc_info);
 899           VEC_free (access_site_info_p, heap, mi->access_l);
 900
 901         }
 902       return 1;
 903     }
 904   if (!mi->dim_hot_level)
 905     mi->dim_hot_level =
 906       (gcov_type *) xcalloc (min_escape_l, sizeof (gcov_type));
 907
 908
 909   for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
 910        i++)
 911     {
 912       if (TREE_CODE (TREE_OPERAND (acc_info->stmt, 1)) == POINTER_PLUS_EXPR
 913           && acc_info->level < min_escape_l)
 914         {
 915           loop = loop_containing_stmt (acc_info->stmt);
 916           if (!loop || loop->inner)
 917             {
 918               free (acc_info);
 919               continue;
 920             }
 921           if (simple_iv (loop, acc_info->stmt, acc_info->offset, &iv, true))
 922             {
 923               if (iv.step != NULL)
 924                 {
 925                   HOST_WIDE_INT istep;
 926
 927                   istep = int_cst_value (iv.step);
 928                   if (istep != 0)
 929                     {
 930                       acc_info->iterated_by_inner_most_loop_p = 1;
 931                       mi->dim_hot_level[acc_info->level] +=
 932                         gimple_bb (acc_info->stmt)->count;
 933                     }
 934
 935                 }
 936             }
 937         }
 938       free (acc_info);
 939     }
 940   VEC_free (access_site_info_p, heap, mi->access_l);
 941
 942   return 1;
 943 }
 944
 945 /* Find the index which defines the OFFSET from base.
 946    We walk from use to def until we find how the offset was defined.  */
 947 static tree
 948 get_index_from_offset (tree offset, tree def_stmt)
 949 {
 950   tree op1, op2, expr, index;
 951
 952   if (TREE_CODE (def_stmt) == PHI_NODE)
 953     return NULL;
 954   expr = get_inner_of_cast_expr (TREE_OPERAND (def_stmt, 1));
 955   if (TREE_CODE (expr) == SSA_NAME)
 956     return get_index_from_offset (offset, SSA_NAME_DEF_STMT (expr));
 957   else if (TREE_CODE (expr) == MULT_EXPR)
 958     {
 959       op1 = TREE_OPERAND (expr, 0);
 960       op2 = TREE_OPERAND (expr, 1);
 961       if (TREE_CODE (op1) != INTEGER_CST && TREE_CODE (op2) != INTEGER_CST)
 962         return NULL;
 963       index = (TREE_CODE (op1) == INTEGER_CST) ? op2 : op1;
 964       return index;
 965     }
 966   else
 967     return NULL_TREE;
 968 }
 969
 970 /* update MI->dimension_type_size[CURRENT_INDIRECT_LEVEL] with the size
 971    of the type related to the SSA_VAR, or the type related to the
 972    lhs of STMT, in the case that it is an INDIRECT_REF.  */
 973 static void
 974 update_type_size (struct matrix_info *mi, tree stmt, tree ssa_var,
 975                   int current_indirect_level)
 976 {
 977   tree lhs;
 978   HOST_WIDE_INT type_size;
 979
 980   /* Update type according to the type of the INDIRECT_REF expr.   */
 981   if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
 982       && TREE_CODE (TREE_OPERAND (stmt, 0)) == INDIRECT_REF)
 983     {
 984       lhs = TREE_OPERAND (stmt, 0);
 985       gcc_assert (POINTER_TYPE_P
 986                   (TREE_TYPE (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
 987       type_size =
 988         int_size_in_bytes (TREE_TYPE
 989                            (TREE_TYPE
 990                             (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
 991     }
 992   else
 993     type_size = int_size_in_bytes (TREE_TYPE (ssa_var));
 994
 995   /* Record the size of elements accessed (as a whole)
 996      in the current indirection level (dimension).  If the size of
 997      elements is not known at compile time, mark it as escaping.  */
 998   if (type_size <= 0)
 999     mark_min_matrix_escape_level (mi, current_indirect_level, stmt);
1000   else
1001     {
1002       int l = current_indirect_level;
1003
1004       if (!mi->dimension_type_size)
1005         {
1006           mi->dimension_type_size
1007             = (HOST_WIDE_INT *) xcalloc (l + 1, sizeof (HOST_WIDE_INT));
1008           mi->dimension_type_size_len = l + 1;
1009         }
1010       else if (mi->dimension_type_size_len < l + 1)
1011         {
1012           mi->dimension_type_size
1013             = (HOST_WIDE_INT *) xrealloc (mi->dimension_type_size,
1014                                           (l + 1) * sizeof (HOST_WIDE_INT));
1015           memset (&mi->dimension_type_size[mi->dimension_type_size_len],
1016                   0, (l + 1 - mi->dimension_type_size_len)
1017                   * sizeof (HOST_WIDE_INT));
1018           mi->dimension_type_size_len = l + 1;
1019         }
1020       /* Make sure all the accesses in the same level have the same size
1021          of the type.  */
1022       if (!mi->dimension_type_size[l])
1023         mi->dimension_type_size[l] = type_size;
1024       else if (mi->dimension_type_size[l] != type_size)
1025         mark_min_matrix_escape_level (mi, l, stmt);
1026     }
1027 }
1028
1029 /* USE_STMT represents a call_expr ,where one of the arguments is the
1030    ssa var that we want to check because it came from some use of matrix
1031    MI.  CURRENT_INDIRECT_LEVEL is the indirection level we reached so
1032    far.  */
1033
1034 static void
1035 analyze_accesses_for_call_expr (struct matrix_info *mi, tree use_stmt,
1036                                 int current_indirect_level)
1037 {
1038   tree call = get_call_expr_in (use_stmt);
1039   if (call && get_callee_fndecl (call))
1040     {
1041       if (DECL_FUNCTION_CODE (get_callee_fndecl (call)) != BUILT_IN_FREE)
1042         {
1043           if (dump_file)
1044             fprintf (dump_file,
1045                      "Matrix %s: Function call %s, level %d escapes.\n",
1046                      get_name (mi->decl), get_name (get_callee_fndecl (call)),
1047                      current_indirect_level);
1048           mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1049         }
1050       else if (mi->free_stmts[current_indirect_level].stmt != NULL
1051                && mi->free_stmts[current_indirect_level].stmt != use_stmt)
1052         mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1053       else
1054         {
1055           /*Record the free statements so we can delete them
1056              later. */
1057           int l = current_indirect_level;
1058
1059           mi->free_stmts[l].stmt = use_stmt;
1060           mi->free_stmts[l].func = current_function_decl;
1061         }
1062     }
1063 }
1064
1065 /* USE_STMT represents a phi node of the ssa var that we want to
1066    check  because it came from some use of matrix
1067    MI.
1068    We check all the escaping levels that get to the PHI node
1069    and make sure they are all the same escaping;
1070    if not (which is rare) we let the escaping level be the
1071    minimum level that gets into that PHI because starting from
1072    that level we cannot expect the behavior of the indirections.
1073    CURRENT_INDIRECT_LEVEL is the indirection level we reached so far.  */
1074
1075 static void
1076 analyze_accesses_for_phi_node (struct matrix_info *mi, tree use_stmt,
1077                                int current_indirect_level, sbitmap visited,
1078                                bool record_accesses)
1079 {
1080
1081   struct matrix_access_phi_node tmp_maphi, *maphi, **pmaphi;
1082
1083   tmp_maphi.phi = use_stmt;
1084   if ((maphi = (struct matrix_access_phi_node *)
1085        htab_find (htab_mat_acc_phi_nodes, &tmp_maphi)))
1086     {
1087       if (maphi->indirection_level == current_indirect_level)
1088         return;
1089       else
1090         {
1091           int level = MIN (maphi->indirection_level,
1092                            current_indirect_level);
1093           int j;
1094           tree t = NULL_TREE;
1095
1096           maphi->indirection_level = level;
1097           for (j = 0; j < PHI_NUM_ARGS (use_stmt); j++)
1098             {
1099               tree def = PHI_ARG_DEF (use_stmt, j);
1100
1101               if (TREE_CODE (SSA_NAME_DEF_STMT (def)) != PHI_NODE)
1102                 t = SSA_NAME_DEF_STMT (def);
1103             }
1104           mark_min_matrix_escape_level (mi, level, t);
1105         }
1106       return;
1107     }
1108   maphi = (struct matrix_access_phi_node *)
1109     xcalloc (1, sizeof (struct matrix_access_phi_node));
1110   maphi->phi = use_stmt;
1111   maphi->indirection_level = current_indirect_level;
1112
1113   /* Insert to hash table.  */
1114   pmaphi = (struct matrix_access_phi_node **)
1115     htab_find_slot (htab_mat_acc_phi_nodes, maphi, INSERT);
1116   gcc_assert (pmaphi);
1117   *pmaphi = maphi;
1118
1119   if (!TEST_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt))))
1120     {
1121       SET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1122       analyze_matrix_accesses (mi, PHI_RESULT (use_stmt),
1123                                current_indirect_level, false, visited,
1124                                record_accesses);
1125       RESET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1126     }
1127 }
1128
1129 /* USE_STMT represents a modify statement (the rhs or lhs include
1130    the ssa var that we want to check  because it came from some use of matrix
1131    MI.
1132    CURRENT_INDIRECT_LEVEL is the indirection level we reached so far.  */
1133
1134 static int
1135 analyze_accesses_for_modify_stmt (struct matrix_info *mi, tree ssa_var,
1136                                   tree use_stmt, int current_indirect_level,
1137                                   bool last_op, sbitmap visited,
1138                                   bool record_accesses)
1139 {
1140
1141   tree lhs = TREE_OPERAND (use_stmt, 0);
1142   tree rhs = TREE_OPERAND (use_stmt, 1);
1143   struct ssa_acc_in_tree lhs_acc, rhs_acc;
1144
1145   memset (&lhs_acc, 0, sizeof (lhs_acc));
1146   memset (&rhs_acc, 0, sizeof (rhs_acc));
1147
1148   lhs_acc.ssa_var = ssa_var;
1149   lhs_acc.t_code = ERROR_MARK;
1150   ssa_accessed_in_tree (lhs, &lhs_acc);
1151   rhs_acc.ssa_var = ssa_var;
1152   rhs_acc.t_code = ERROR_MARK;
1153   ssa_accessed_in_tree (get_inner_of_cast_expr (rhs), &rhs_acc);
1154
1155   /* The SSA must be either in the left side or in the right side,
1156      to understand what is happening.
1157      In case the SSA_NAME is found in both sides we should be escaping
1158      at this level because in this case we cannot calculate the
1159      address correctly.  */
1160   if ((lhs_acc.var_found && rhs_acc.var_found
1161        && lhs_acc.t_code == INDIRECT_REF)
1162       || (!rhs_acc.var_found && !lhs_acc.var_found))
1163     {
1164       mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1165       return current_indirect_level;
1166     }
1167   gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1168
1169   /* If we are storing to the matrix at some level, then mark it as
1170      escaping at that level.  */
1171   if (lhs_acc.var_found)
1172     {
1173       tree def;
1174       int l = current_indirect_level + 1;
1175
1176       gcc_assert (lhs_acc.t_code == INDIRECT_REF);
1177       def = get_inner_of_cast_expr (rhs);
1178       if (TREE_CODE (def) != SSA_NAME)
1179         mark_min_matrix_escape_level (mi, l, use_stmt);
1180       else
1181         {
1182           def = SSA_NAME_DEF_STMT (def);
1183           analyze_matrix_allocation_site (mi, def, l, visited);
1184           if (record_accesses)
1185             record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1186                                            NULL_TREE, l, true);
1187           update_type_size (mi, use_stmt, NULL, l);
1188         }
1189       return current_indirect_level;
1190     }
1191   /* Now, check the right-hand-side, to see how the SSA variable
1192      is used.  */
1193   if (rhs_acc.var_found)
1194     {
1195       /* If we are passing the ssa name to a function call and
1196          the pointer escapes when passed to the function
1197          (not the case of free), then we mark the matrix as
1198          escaping at this level.  */
1199       if (rhs_acc.t_code == CALL_EXPR)
1200         {
1201           analyze_accesses_for_call_expr (mi, use_stmt,
1202                                           current_indirect_level);
1203
1204           return current_indirect_level;
1205         }
1206       if (rhs_acc.t_code != INDIRECT_REF
1207           && rhs_acc.t_code != POINTER_PLUS_EXPR && rhs_acc.t_code != SSA_NAME)
1208         {
1209           mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1210           return current_indirect_level;
1211         }
1212       /* If the access in the RHS has an indirection increase the
1213          indirection level.  */
1214       if (rhs_acc.t_code == INDIRECT_REF)
1215         {
1216           if (record_accesses)
1217             record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1218                                            NULL_TREE,
1219                                            current_indirect_level, true);
1220           current_indirect_level += 1;
1221         }
1222       else if (rhs_acc.t_code == POINTER_PLUS_EXPR)
1223         {
1224           gcc_assert (rhs_acc.second_op);
1225           if (last_op)
1226             /* Currently we support only one PLUS expression on the
1227                SSA_NAME that holds the base address of the current
1228                indirection level; to support more general case there
1229                is a need to hold a stack of expressions and regenerate
1230                the calculation later.  */
1231             mark_min_matrix_escape_level (mi, current_indirect_level,
1232                                           use_stmt);
1233           else
1234             {
1235               tree index;
1236               tree op1, op2;
1237
1238               op1 = TREE_OPERAND (rhs, 0);
1239               op2 = TREE_OPERAND (rhs, 1);
1240
1241               op2 = (op1 == ssa_var) ? op2 : op1;
1242               if (TREE_CODE (op2) == INTEGER_CST)
1243                 index =
1244                   build_int_cst (TREE_TYPE (op1),
1245                                  TREE_INT_CST_LOW (op2) /
1246                                  int_size_in_bytes (TREE_TYPE (op1)));
1247               else
1248                 {
1249                   index =
1250                     get_index_from_offset (op2, SSA_NAME_DEF_STMT (op2));
1251                   if (index == NULL_TREE)
1252                     {
1253                       mark_min_matrix_escape_level (mi,
1254                                                     current_indirect_level,
1255                                                     use_stmt);
1256                       return current_indirect_level;
1257                     }
1258                 }
1259               if (record_accesses)
1260                 record_access_alloc_site_info (mi, use_stmt, op2,
1261                                                index,
1262                                                current_indirect_level, false);
1263             }
1264         }
1265       /* If we are storing this level of indirection mark it as
1266          escaping.  */
1267       if (lhs_acc.t_code == INDIRECT_REF || TREE_CODE (lhs) != SSA_NAME)
1268         {
1269           int l = current_indirect_level;
1270
1271           /* One exception is when we are storing to the matrix
1272              variable itself; this is the case of malloc, we must make
1273              sure that it's the one and only one call to malloc so
1274              we call analyze_matrix_allocation_site to check
1275              this out.  */
1276           if (TREE_CODE (lhs) != VAR_DECL || lhs != mi->decl)
1277             mark_min_matrix_escape_level (mi, current_indirect_level,
1278                                           use_stmt);
1279           else
1280             {
1281               /* Also update the escaping level.  */
1282               analyze_matrix_allocation_site (mi, use_stmt, l, visited);
1283               if (record_accesses)
1284                 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1285                                                NULL_TREE, l, true);
1286             }
1287         }
1288       else
1289         {
1290           /* We are placing it in an SSA, follow that SSA.  */
1291           analyze_matrix_accesses (mi, lhs,
1292                                    current_indirect_level,
1293                                    rhs_acc.t_code == POINTER_PLUS_EXPR,
1294                                    visited, record_accesses);
1295         }
1296     }
1297   return current_indirect_level;
1298 }
1299
1300 /* Given a SSA_VAR (coming from a use statement of the matrix MI),
1301    follow its uses and level of indirection and find out the minimum
1302    indirection level it escapes in (the highest dimension) and the maximum
1303    level it is accessed in (this will be the actual dimension of the
1304    matrix).  The information is accumulated in MI.
1305    We look at the immediate uses, if one escapes we finish; if not,
1306    we make a recursive call for each one of the immediate uses of the
1307    resulting SSA name.  */
1308 static void
1309 analyze_matrix_accesses (struct matrix_info *mi, tree ssa_var,
1310                          int current_indirect_level, bool last_op,
1311                          sbitmap visited, bool record_accesses)
1312 {
1313   imm_use_iterator imm_iter;
1314   use_operand_p use_p;
1315
1316   update_type_size (mi, SSA_NAME_DEF_STMT (ssa_var), ssa_var,
1317                     current_indirect_level);
1318
1319   /* We don't go beyond the escaping level when we are performing the
1320      flattening.  NOTE: we keep the last indirection level that doesn't
1321      escape.  */
1322   if (mi->min_indirect_level_escape > -1
1323       && mi->min_indirect_level_escape <= current_indirect_level)
1324     return;
1325
1326 /* Now go over the uses of the SSA_NAME and check how it is used in
1327    each one of them.  We are mainly looking for the pattern INDIRECT_REF,
1328    then a POINTER_PLUS_EXPR, then INDIRECT_REF etc.  while in between there could
1329    be any number of copies and casts.  */
1330   gcc_assert (TREE_CODE (ssa_var) == SSA_NAME);
1331
1332   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_var)
1333   {
1334     tree use_stmt = USE_STMT (use_p);
1335     if (TREE_CODE (use_stmt) == PHI_NODE)
1336       /* We check all the escaping levels that get to the PHI node
1337          and make sure they are all the same escaping;
1338          if not (which is rare) we let the escaping level be the
1339          minimum level that gets into that PHI because starting from
1340          that level we cannot expect the behavior of the indirections.  */
1341
1342       analyze_accesses_for_phi_node (mi, use_stmt, current_indirect_level,
1343                                      visited, record_accesses);
1344
1345     else if (TREE_CODE (use_stmt) == CALL_EXPR)
1346       analyze_accesses_for_call_expr (mi, use_stmt, current_indirect_level);
1347     else if (TREE_CODE (use_stmt) == GIMPLE_MODIFY_STMT)
1348       current_indirect_level =
1349         analyze_accesses_for_modify_stmt (mi, ssa_var, use_stmt,
1350                                           current_indirect_level, last_op,
1351                                           visited, record_accesses);
1352   }
1353 }
1354
1355
1356 /* A walk_tree function to go over the VAR_DECL, PARM_DECL nodes of
1357    the malloc size expression and check that those aren't changed
1358    over the function.  */
1359 static tree
1360 check_var_notmodified_p (tree * tp, int *walk_subtrees, void *data)
1361 {
1362   basic_block bb;
1363   tree t = *tp;
1364   tree fn = (tree) data;
1365   block_stmt_iterator bsi;
1366   tree stmt;
1367
1368   if (TREE_CODE (t) != VAR_DECL && TREE_CODE (t) != PARM_DECL)
1369     return NULL_TREE;
1370
1371   FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (fn))
1372   {
1373     for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1374       {
1375         stmt = bsi_stmt (bsi);
1376         if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
1377           continue;
1378         if (TREE_OPERAND (stmt, 0) == t)
1379           return stmt;
1380       }
1381   }
1382   *walk_subtrees = 1;
1383   return NULL_TREE;
1384 }
1385
1386 /* Go backwards in the use-def chains and find out the expression
1387    represented by the possible SSA name in EXPR, until it is composed
1388    of only VAR_DECL, PARM_DECL and INT_CST.  In case of phi nodes
1389    we make sure that all the arguments represent the same subexpression,
1390    otherwise we fail.  */
1391 static tree
1392 can_calculate_expr_before_stmt (tree expr, sbitmap visited)
1393 {
1394   tree def_stmt, op1, op2, res;
1395
1396   switch (TREE_CODE (expr))
1397     {
1398     case SSA_NAME:
1399       /* Case of loop, we don't know to represent this expression.  */
1400       if (TEST_BIT (visited, SSA_NAME_VERSION (expr)))
1401         return NULL_TREE;
1402
1403       SET_BIT (visited, SSA_NAME_VERSION (expr));
1404       def_stmt = SSA_NAME_DEF_STMT (expr);
1405       res = can_calculate_expr_before_stmt (def_stmt, visited);
1406       RESET_BIT (visited, SSA_NAME_VERSION (expr));
1407       return res;
1408     case VAR_DECL:
1409     case PARM_DECL:
1410     case INTEGER_CST:
1411       return expr;
1412     case POINTER_PLUS_EXPR:
1413     case PLUS_EXPR:
1414     case MINUS_EXPR:
1415     case MULT_EXPR:
1416       op1 = TREE_OPERAND (expr, 0);
1417       op2 = TREE_OPERAND (expr, 1);
1418
1419       op1 = can_calculate_expr_before_stmt (op1, visited);
1420       if (!op1)
1421         return NULL_TREE;
1422       op2 = can_calculate_expr_before_stmt (op2, visited);
1423       if (op2)
1424         return fold_build2 (TREE_CODE (expr), TREE_TYPE (expr), op1, op2);
1425       return NULL_TREE;
1426     case GIMPLE_MODIFY_STMT:
1427       return can_calculate_expr_before_stmt (TREE_OPERAND (expr, 1),
1428                                              visited);
1429     case PHI_NODE:
1430       {
1431         int j;
1432
1433         res = NULL_TREE;
1434         /* Make sure all the arguments represent the same value.  */
1435         for (j = 0; j < PHI_NUM_ARGS (expr); j++)
1436           {
1437             tree new_res;
1438             tree def = PHI_ARG_DEF (expr, j);
1439
1440             new_res = can_calculate_expr_before_stmt (def, visited);
1441             if (res == NULL_TREE)
1442               res = new_res;
1443             else if (!new_res || !expressions_equal_p (res, new_res))
1444               return NULL_TREE;
1445           }
1446         return res;
1447       }
1448     CASE_CONVERT:
1449       res = can_calculate_expr_before_stmt (TREE_OPERAND (expr, 0), visited);
1450       if (res != NULL_TREE)
1451         return build1 (TREE_CODE (expr), TREE_TYPE (expr), res);
1452       else
1453         return NULL_TREE;
1454
1455     default:
1456       return NULL_TREE;
1457     }
1458 }
1459
1460 /* There should be only one allocation function for the dimensions
1461    that don't escape. Here we check the allocation sites in this
1462    function. We must make sure that all the dimensions are allocated
1463    using malloc and that the malloc size parameter expression could be
1464    pre-calculated before the call to the malloc of dimension 0.
1465
1466    Given a candidate matrix for flattening -- MI -- check if it's
1467    appropriate for flattening -- we analyze the allocation
1468    sites that we recorded in the previous analysis.  The result of the
1469    analysis is a level of indirection (matrix dimension) in which the
1470    flattening is safe.  We check the following conditions:
1471    1. There is only one allocation site for each dimension.
1472    2. The allocation sites of all the dimensions are in the same
1473       function.
1474       (The above two are being taken care of during the analysis when
1475       we check the allocation site).
1476    3. All the dimensions that we flatten are allocated at once; thus
1477       the total size must be known before the allocation of the
1478       dimension 0 (top level) -- we must make sure we represent the
1479       size of the allocation as an expression of global parameters or
1480       constants and that those doesn't change over the function.  */
1481
1482 static int
1483 check_allocation_function (void **slot, void *data ATTRIBUTE_UNUSED)
1484 {
1485   int level;
1486   block_stmt_iterator bsi;
1487   basic_block bb_level_0;
1488   struct matrix_info *mi = (struct matrix_info *) *slot;
1489   sbitmap visited;
1490
1491   if (!mi->malloc_for_level)
1492     return 1;
1493
1494   visited = sbitmap_alloc (num_ssa_names);
1495
1496   /* Do nothing if the current function is not the allocation
1497      function of MI.  */
1498   if (mi->allocation_function_decl != current_function_decl
1499       /* We aren't in the main allocation function yet.  */
1500       || !mi->malloc_for_level[0])
1501     return 1;
1502
1503   for (level = 1; level < mi->max_malloced_level; level++)
1504     if (!mi->malloc_for_level[level])
1505       break;
1506
1507   mark_min_matrix_escape_level (mi, level, NULL_TREE);
1508
1509   bsi = bsi_for_stmt (mi->malloc_for_level[0]);
1510   bb_level_0 = bsi.bb;
1511
1512   /* Check if the expression of the size passed to malloc could be
1513      pre-calculated before the malloc of level 0.  */
1514   for (level = 1; level < mi->min_indirect_level_escape; level++)
1515     {
1516       tree call_stmt, size;
1517       struct malloc_call_data mcd;
1518
1519       call_stmt = mi->malloc_for_level[level];
1520
1521       /* Find the correct malloc information.  */
1522       collect_data_for_malloc_call (call_stmt, &mcd);
1523
1524       /* No need to check anticipation for constants.  */
1525       if (TREE_CODE (mcd.size_var) == INTEGER_CST)
1526         {
1527           if (!mi->dimension_size)
1528             {
1529               mi->dimension_size =
1530                 (tree *) xcalloc (mi->min_indirect_level_escape,
1531                                   sizeof (tree));
1532               mi->dimension_size_orig =
1533                 (tree *) xcalloc (mi->min_indirect_level_escape,
1534                                   sizeof (tree));
1535             }
1536           mi->dimension_size[level] = mcd.size_var;
1537           mi->dimension_size_orig[level] = mcd.size_var;
1538           continue;
1539         }
1540       /* ??? Here we should also add the way to calculate the size
1541          expression not only know that it is anticipated.  */
1542       sbitmap_zero (visited);
1543       size = can_calculate_expr_before_stmt (mcd.size_var, visited);
1544       if (size == NULL_TREE)
1545         {
1546           mark_min_matrix_escape_level (mi, level, call_stmt);
1547           if (dump_file)
1548             fprintf (dump_file,
1549                      "Matrix %s: Cannot calculate the size of allocation, escaping at level %d\n",
1550                      get_name (mi->decl), level);
1551           break;
1552         }
1553       if (!mi->dimension_size)
1554         {
1555           mi->dimension_size =
1556             (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1557           mi->dimension_size_orig =
1558             (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1559         }
1560       mi->dimension_size[level] = size;
1561       mi->dimension_size_orig[level] = size;
1562     }
1563
1564   /* We don't need those anymore.  */
1565   for (level = mi->min_indirect_level_escape;
1566        level < mi->max_malloced_level; level++)
1567     mi->malloc_for_level[level] = NULL;
1568   return 1;
1569 }
1570
1571 /* Track all access and allocation sites.  */
1572 static void
1573 find_sites_in_func (bool record)
1574 {
1575   sbitmap visited_stmts_1;
1576
1577   block_stmt_iterator bsi;
1578   tree stmt;
1579   basic_block bb;
1580   struct matrix_info tmpmi, *mi;
1581
1582   visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1583
1584   FOR_EACH_BB (bb)
1585   {
1586     for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1587       {
1588         stmt = bsi_stmt (bsi);
1589         if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
1590             && TREE_CODE (TREE_OPERAND (stmt, 0)) == VAR_DECL)
1591           {
1592             tmpmi.decl = TREE_OPERAND (stmt, 0);
1593             if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1594                                                         &tmpmi)))
1595               {
1596                 sbitmap_zero (visited_stmts_1);
1597                 analyze_matrix_allocation_site (mi, stmt, 0, visited_stmts_1);
1598               }
1599           }
1600         if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
1601             && TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME
1602             && TREE_CODE (TREE_OPERAND (stmt, 1)) == VAR_DECL)
1603           {
1604             tmpmi.decl = TREE_OPERAND (stmt, 1);
1605             if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1606                                                         &tmpmi)))
1607               {
1608                 sbitmap_zero (visited_stmts_1);
1609                 analyze_matrix_accesses (mi,
1610                                          TREE_OPERAND (stmt, 0), 0,
1611                                          false, visited_stmts_1, record);
1612               }
1613           }
1614       }
1615   }
1616   sbitmap_free (visited_stmts_1);
1617 }
1618
1619 /* Traverse the use-def chains to see if there are matrices that
1620    are passed through pointers and we cannot know how they are accessed.
1621    For each SSA-name defined by a global variable of our interest,
1622    we traverse the use-def chains of the SSA and follow the indirections,
1623    and record in what level of indirection the use of the variable
1624    escapes.  A use of a pointer escapes when it is passed to a function,
1625    stored into memory or assigned (except in malloc and free calls).  */
1626
1627 static void
1628 record_all_accesses_in_func (void)
1629 {
1630   unsigned i;
1631   sbitmap visited_stmts_1;
1632
1633   visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1634
1635   for (i = 0; i < num_ssa_names; i++)
1636     {
1637       struct matrix_info tmpmi, *mi;
1638       tree ssa_var = ssa_name (i);
1639       tree rhs, lhs;
1640
1641       if (!ssa_var
1642           || TREE_CODE (SSA_NAME_DEF_STMT (ssa_var)) != GIMPLE_MODIFY_STMT)
1643         continue;
1644       rhs = TREE_OPERAND (SSA_NAME_DEF_STMT (ssa_var), 1);
1645       lhs = TREE_OPERAND (SSA_NAME_DEF_STMT (ssa_var), 0);
1646       if (TREE_CODE (rhs) != VAR_DECL && TREE_CODE (lhs) != VAR_DECL)
1647         continue;
1648
1649       /* If the RHS is a matrix that we want to analyze, follow the def-use
1650          chain for this SSA_VAR and check for escapes or apply the
1651          flattening.  */
1652       tmpmi.decl = rhs;
1653       if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi)))
1654         {
1655           /* This variable will track the visited PHI nodes, so we can limit
1656              its size to the maximum number of SSA names.  */
1657           sbitmap_zero (visited_stmts_1);
1658           analyze_matrix_accesses (mi, ssa_var,
1659                                    0, false, visited_stmts_1, true);
1660
1661         }
1662     }
1663   sbitmap_free (visited_stmts_1);
1664 }
1665
1666 /* Used when we want to convert the expression: RESULT =  something * ORIG to RESULT = something * NEW. If ORIG and NEW are power of 2, shift operations can be done, else division and multiplication.  */
1667 static tree
1668 compute_offset (HOST_WIDE_INT orig, HOST_WIDE_INT new, tree result)
1669 {
1670
1671   int x, y;
1672   tree result1, ratio, log, orig_tree, new_tree;
1673
1674   x = exact_log2 (orig);
1675   y = exact_log2 (new);
1676
1677   if (x != -1 && y != -1)
1678     {
1679       if (x == y)
1680         return result;
1681       else if (x > y)
1682         {
1683           log = build_int_cst (TREE_TYPE (result), x - y);
1684           result1 =
1685             fold_build2 (LSHIFT_EXPR, TREE_TYPE (result), result, log);
1686           return result1;
1687         }
1688       log = build_int_cst (TREE_TYPE (result), y - x);
1689       result1 = fold_build2 (RSHIFT_EXPR, TREE_TYPE (result), result, log);
1690
1691       return result1;
1692     }
1693   orig_tree = build_int_cst (TREE_TYPE (result), orig);
1694   new_tree = build_int_cst (TREE_TYPE (result), new);
1695   ratio = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (result), result, orig_tree);
1696   result1 = fold_build2 (MULT_EXPR, TREE_TYPE (result), ratio, new_tree);
1697
1698   return result1;
1699 }
1700
1701
1702 /* We know that we are allowed to perform matrix flattening (according to the
1703    escape analysis), so we traverse the use-def chains of the SSA vars
1704    defined by the global variables pointing to the matrices of our interest.
1705    in each use of the SSA we calculate the offset from the base address
1706    according to the following equation:
1707
1708      a[I1][I2]...[Ik] , where D1..Dk is the length of each dimension and the
1709      escaping level is m <= k, and a' is the new allocated matrix,
1710      will be translated to :
1711
1712        b[I(m+1)]...[Ik]
1713
1714        where
1715        b = a' + I1*D2...*Dm + I2*D3...Dm + ... + Im
1716                                                       */
1717
1718 static int
1719 transform_access_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1720 {
1721   block_stmt_iterator bsi;
1722   struct matrix_info *mi = (struct matrix_info *) *slot;
1723   int min_escape_l = mi->min_indirect_level_escape;
1724   struct access_site_info *acc_info;
1725   int i;
1726
1727   if (min_escape_l < 2 || !mi->access_l)
1728     return 1;
1729   for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
1730        i++)
1731     {
1732       tree orig, type;
1733
1734       /* This is possible because we collect the access sites before
1735          we determine the final minimum indirection level.  */
1736       if (acc_info->level >= min_escape_l)
1737         {
1738           free (acc_info);
1739           continue;
1740         }
1741       if (acc_info->is_alloc)
1742         {
1743           if (acc_info->level >= 0 && gimple_bb (acc_info->stmt))
1744             {
1745               ssa_op_iter iter;
1746               tree def;
1747               tree stmt = acc_info->stmt;
1748
1749               FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
1750                 mark_sym_for_renaming (SSA_NAME_VAR (def));
1751               bsi = bsi_for_stmt (stmt);
1752               gcc_assert (TREE_CODE (acc_info->stmt) == GIMPLE_MODIFY_STMT);
1753               if (TREE_CODE (TREE_OPERAND (acc_info->stmt, 0)) ==
1754                   SSA_NAME && acc_info->level < min_escape_l - 1)
1755                 {
1756                   imm_use_iterator imm_iter;
1757                   use_operand_p use_p;
1758                   tree use_stmt;
1759
1760                   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
1761                                          TREE_OPERAND (acc_info->stmt,
1762                                                               0))
1763                     FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1764                   {
1765                     tree conv, tmp, stmts;
1766
1767                     /* Emit convert statement to convert to type of use.  */
1768                     conv =
1769                       fold_build1 (CONVERT_EXPR,
1770                                    TREE_TYPE (TREE_OPERAND
1771                                               (acc_info->stmt, 0)),
1772                                    TREE_OPERAND (TREE_OPERAND
1773                                                  (acc_info->stmt, 1), 0));
1774                     tmp =
1775                       create_tmp_var (TREE_TYPE
1776                                       (TREE_OPERAND
1777                                        (acc_info->stmt, 0)), "new");
1778                     add_referenced_var (tmp);
1779                     stmts =
1780                       fold_build2 (GIMPLE_MODIFY_STMT,
1781                                    TREE_TYPE (TREE_OPERAND
1782                                               (acc_info->stmt, 0)), tmp,
1783                                    conv);
1784                     tmp = make_ssa_name (tmp, stmts);
1785                     TREE_OPERAND (stmts, 0) = tmp;
1786                     bsi = bsi_for_stmt (acc_info->stmt);
1787                     bsi_insert_after (&bsi, stmts, BSI_SAME_STMT);
1788                     SET_USE (use_p, tmp);
1789                   }
1790                 }
1791               if (acc_info->level < min_escape_l - 1)
1792                 bsi_remove (&bsi, true);
1793             }
1794           free (acc_info);
1795           continue;
1796         }
1797       orig = TREE_OPERAND (acc_info->stmt, 1);
1798       type = TREE_TYPE (orig);
1799       if (TREE_CODE (orig) == INDIRECT_REF
1800           && acc_info->level < min_escape_l - 1)
1801         {
1802           /* Replace the INDIRECT_REF with NOP (cast) usually we are casting
1803              from "pointer to type" to "type".  */
1804           orig =
1805             build1 (NOP_EXPR, TREE_TYPE (orig),
1806                     TREE_OPERAND (orig, 0));
1807           TREE_OPERAND (acc_info->stmt, 1) = orig;
1808         }
1809       else if (TREE_CODE (orig) == POINTER_PLUS_EXPR
1810                && acc_info->level < (min_escape_l))
1811         {
1812           imm_use_iterator imm_iter;
1813           use_operand_p use_p;
1814
1815           tree offset;
1816           int k = acc_info->level;
1817           tree num_elements, total_elements;
1818           tree tmp1;
1819           tree d_size = mi->dimension_size[k];
1820
1821           /* We already make sure in the analysis that the first operand
1822              is the base and the second is the offset.  */
1823           offset = acc_info->offset;
1824           if (mi->dim_map[k] == min_escape_l - 1)
1825             {
1826               if (!check_transpose_p || mi->is_transposed_p == false)
1827                 tmp1 = offset;
1828               else
1829                 {
1830                   tree new_offset;
1831                   tree d_type_size, d_type_size_k;
1832
1833                   d_type_size = size_int (mi->dimension_type_size[min_escape_l]);
1834                   d_type_size_k = size_int (mi->dimension_type_size[k + 1]);
1835
1836                   new_offset =
1837                     compute_offset (mi->dimension_type_size[min_escape_l],
1838                                     mi->dimension_type_size[k + 1], offset);
1839
1840                   total_elements = new_offset;
1841                   if (new_offset != offset)
1842                     {
1843                       bsi = bsi_for_stmt (acc_info->stmt);
1844                       tmp1 = force_gimple_operand_bsi (&bsi, total_elements,
1845                                                        true, NULL,
1846                                                        true, BSI_SAME_STMT);
1847                     }
1848                   else
1849                     tmp1 = offset;
1850                 }
1851             }
1852           else
1853             {
1854               d_size = mi->dimension_size[mi->dim_map[k] + 1];
1855               num_elements =
1856                 fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, acc_info->index),
1857                             fold_convert (sizetype, d_size));
1858               add_referenced_var (d_size);
1859               bsi = bsi_for_stmt (acc_info->stmt);
1860               tmp1 = force_gimple_operand_bsi (&bsi, num_elements, true,
1861                                                NULL, true, BSI_SAME_STMT);
1862             }
1863           /* Replace the offset if needed.  */
1864           if (tmp1 != offset)
1865             {
1866               if (TREE_CODE (offset) == SSA_NAME)
1867                 {
1868                   tree use_stmt;
1869
1870                   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, offset)
1871                     FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1872                       if (use_stmt == acc_info->stmt)
1873                         SET_USE (use_p, tmp1);
1874                 }
1875               else
1876                 {
1877                   gcc_assert (TREE_CODE (offset) == INTEGER_CST);
1878                   TREE_OPERAND (orig, 1) = tmp1;
1879                 }
1880             }
1881         }
1882       /* ??? meanwhile this happens because we record the same access
1883          site more than once; we should be using a hash table to
1884          avoid this and insert the STMT of the access site only
1885          once.
1886          else
1887          gcc_unreachable (); */
1888       free (acc_info);
1889     }
1890   VEC_free (access_site_info_p, heap, mi->access_l);
1891
1892   update_ssa (TODO_update_ssa);
1893 #ifdef ENABLE_CHECKING
1894   verify_ssa (true);
1895 #endif
1896   return 1;
1897 }
1898
1899 /* Sort A array of counts. Arrange DIM_MAP to reflect the new order.  */
1900
1901 static void
1902 sort_dim_hot_level (gcov_type * a, int *dim_map, int n)
1903 {
1904   int i, j, tmp1;
1905   gcov_type tmp;
1906
1907   for (i = 0; i < n - 1; i++)
1908     {
1909       for (j = 0; j < n - 1 - i; j++)
1910         {
1911           if (a[j + 1] < a[j])
1912             {
1913               tmp = a[j];       /* swap a[j] and a[j+1]      */
1914               a[j] = a[j + 1];
1915               a[j + 1] = tmp;
1916               tmp1 = dim_map[j];
1917               dim_map[j] = dim_map[j + 1];
1918               dim_map[j + 1] = tmp1;
1919             }
1920         }
1921     }
1922 }
1923
1924 /* Replace multiple mallocs (one for each dimension) to one malloc
1925    with the size of DIM1*DIM2*...*DIMN*size_of_element
1926    Make sure that we hold the size in the malloc site inside a
1927    new global variable; this way we ensure that the size doesn't
1928    change and it is accessible from all the other functions that
1929    uses the matrix.  Also, the original calls to free are deleted,
1930    and replaced by a new call to free the flattened matrix.  */
1931
1932 static int
1933 transform_allocation_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1934 {
1935   int i;
1936   struct matrix_info *mi;
1937   tree type, call_stmt_0, malloc_stmt, oldfn, prev_dim_size, use_stmt;
1938   struct cgraph_node *c_node;
1939   struct cgraph_edge *e;
1940   block_stmt_iterator bsi;
1941   struct malloc_call_data mcd;
1942   HOST_WIDE_INT element_size;
1943
1944   imm_use_iterator imm_iter;
1945   use_operand_p use_p;
1946   tree old_size_0, tmp;
1947   int min_escape_l;
1948   int id;
1949
1950   mi = (struct matrix_info *) *slot;
1951
1952   min_escape_l = mi->min_indirect_level_escape;
1953
1954   if (!mi->malloc_for_level)
1955     mi->min_indirect_level_escape = 0;
1956
1957   if (mi->min_indirect_level_escape < 2)
1958     return 1;
1959
1960   mi->dim_map = (int *) xcalloc (mi->min_indirect_level_escape, sizeof (int));
1961   for (i = 0; i < mi->min_indirect_level_escape; i++)
1962     mi->dim_map[i] = i;
1963   if (check_transpose_p)
1964     {
1965       int i;
1966
1967       if (dump_file)
1968         {
1969           fprintf (dump_file, "Matrix %s:\n", get_name (mi->decl));
1970           for (i = 0; i < min_escape_l; i++)
1971             {
1972               fprintf (dump_file, "dim %d before sort ", i);
1973               if (mi->dim_hot_level)
1974                 fprintf (dump_file,
1975                          "count is  " HOST_WIDEST_INT_PRINT_DEC "  \n",
1976                          mi->dim_hot_level[i]);
1977             }
1978         }
1979       sort_dim_hot_level (mi->dim_hot_level, mi->dim_map,
1980                           mi->min_indirect_level_escape);
1981       if (dump_file)
1982         for (i = 0; i < min_escape_l; i++)
1983           {
1984             fprintf (dump_file, "dim %d after sort\n", i);
1985             if (mi->dim_hot_level)
1986               fprintf (dump_file, "count is  " HOST_WIDE_INT_PRINT_DEC
1987                        "  \n", (HOST_WIDE_INT) mi->dim_hot_level[i]);
1988           }
1989       for (i = 0; i < mi->min_indirect_level_escape; i++)
1990         {
1991           if (dump_file)
1992             fprintf (dump_file, "dim_map[%d] after sort %d\n", i,
1993                      mi->dim_map[i]);
1994           if (mi->dim_map[i] != i)
1995             {
1996               if (dump_file)
1997                 fprintf (dump_file,
1998                          "Transposed dimensions: dim %d is now dim %d\n",
1999                          mi->dim_map[i], i);
2000               mi->is_transposed_p = true;
2001             }
2002         }
2003     }
2004   else
2005     {
2006       for (i = 0; i < mi->min_indirect_level_escape; i++)
2007         mi->dim_map[i] = i;
2008     }
2009   /* Call statement of allocation site of level 0.  */
2010   call_stmt_0 = mi->malloc_for_level[0];
2011
2012   /* Finds the correct malloc information.  */
2013   collect_data_for_malloc_call (call_stmt_0, &mcd);
2014
2015   mi->dimension_size[0] = mcd.size_var;
2016   mi->dimension_size_orig[0] = mcd.size_var;
2017   /* Make sure that the variables in the size expression for
2018      all the dimensions (above level 0) aren't modified in
2019      the allocation function.  */
2020   for (i = 1; i < mi->min_indirect_level_escape; i++)
2021     {
2022       tree t;
2023
2024       /* mi->dimension_size must contain the expression of the size calculated
2025          in check_allocation_function.  */
2026       gcc_assert (mi->dimension_size[i]);
2027
2028       t = walk_tree_without_duplicates (&(mi->dimension_size[i]),
2029                                         check_var_notmodified_p,
2030                                         mi->allocation_function_decl);
2031       if (t != NULL_TREE)
2032         {
2033           mark_min_matrix_escape_level (mi, i, t);
2034           break;
2035         }
2036     }
2037
2038   if (mi->min_indirect_level_escape < 2)
2039     return 1;
2040
2041   /* Since we should make sure that the size expression is available
2042      before the call to malloc of level 0.  */
2043   bsi = bsi_for_stmt (call_stmt_0);
2044
2045   /* Find out the size of each dimension by looking at the malloc
2046      sites and create a global variable to hold it.
2047      We add the assignment to the global before the malloc of level 0.  */
2048
2049   /* To be able to produce gimple temporaries.  */
2050   oldfn = current_function_decl;
2051   current_function_decl = mi->allocation_function_decl;
2052   push_cfun (DECL_STRUCT_FUNCTION (mi->allocation_function_decl));
2053
2054   /* Set the dimension sizes as follows:
2055      DIM_SIZE[i] = DIM_SIZE[n] * ... * DIM_SIZE[i]
2056      where n is the maximum non escaping level.  */
2057   element_size = mi->dimension_type_size[mi->min_indirect_level_escape];
2058   prev_dim_size = NULL_TREE;
2059
2060   for (i = mi->min_indirect_level_escape - 1; i >= 0; i--)
2061     {
2062       tree dim_size, dim_var, tmp;
2063       tree d_type_size;
2064
2065       /* Now put the size expression in a global variable and initialize it to
2066          the size expression before the malloc of level 0.  */
2067       dim_var =
2068         add_new_static_var (TREE_TYPE
2069                             (mi->dimension_size_orig[mi->dim_map[i]]));
2070       type = TREE_TYPE (mi->dimension_size_orig[mi->dim_map[i]]);
2071
2072       /* DIM_SIZE = MALLOC_SIZE_PARAM / TYPE_SIZE.  */
2073       /* Find which dim ID becomes dim I.  */
2074       for (id = 0; id < mi->min_indirect_level_escape; id++)
2075         if (mi->dim_map[id] == i)
2076           break;
2077        d_type_size =
2078         build_int_cst (type, mi->dimension_type_size[id + 1]);
2079       if (!prev_dim_size)
2080         prev_dim_size = build_int_cst (type, element_size);
2081       if (!check_transpose_p && i == mi->min_indirect_level_escape - 1)
2082         {
2083           dim_size = mi->dimension_size_orig[id];
2084         }
2085       else
2086         {
2087           dim_size =
2088             fold_build2 (TRUNC_DIV_EXPR, type, mi->dimension_size_orig[id],
2089                          d_type_size);
2090
2091           dim_size = fold_build2 (MULT_EXPR, type, dim_size, prev_dim_size);
2092         }
2093       dim_size = force_gimple_operand_bsi (&bsi, dim_size, true, NULL,
2094                                            true, BSI_SAME_STMT);
2095       /* GLOBAL_HOLDING_THE_SIZE = DIM_SIZE.  */
2096       tmp = fold_build2 (GIMPLE_MODIFY_STMT, type, dim_var, dim_size);
2097       TREE_OPERAND (tmp, 0) = dim_var;
2098       mark_symbols_for_renaming (tmp);
2099       bsi_insert_before (&bsi, tmp, BSI_SAME_STMT);
2100
2101       prev_dim_size = mi->dimension_size[i] = dim_var;
2102     }
2103   update_ssa (TODO_update_ssa);
2104   /* Replace the malloc size argument in the malloc of level 0 to be
2105      the size of all the dimensions.  */
2106   malloc_stmt = TREE_OPERAND (call_stmt_0, 1);
2107   c_node = cgraph_node (mi->allocation_function_decl);
2108   old_size_0 = CALL_EXPR_ARG (malloc_stmt, 0);
2109   tmp = force_gimple_operand_bsi (&bsi, mi->dimension_size[0], true,
2110                                   NULL, true, BSI_SAME_STMT);
2111   if (TREE_CODE (old_size_0) == SSA_NAME)
2112     {
2113       FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, old_size_0)
2114         FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2115         if (use_stmt == call_stmt_0)
2116         SET_USE (use_p, tmp);
2117     }
2118   /* When deleting the calls to malloc we need also to remove the edge from
2119      the call graph to keep it consistent.  Notice that cgraph_edge may
2120      create a new node in the call graph if there is no node for the given
2121      declaration; this shouldn't be the case but currently there is no way to
2122      check this outside of "cgraph.c".  */
2123   for (i = 1; i < mi->min_indirect_level_escape; i++)
2124     {
2125       block_stmt_iterator bsi;
2126       tree use_stmt1 = NULL;
2127       tree call;
2128
2129       tree call_stmt = mi->malloc_for_level[i];
2130       call = TREE_OPERAND (call_stmt, 1);
2131       gcc_assert (TREE_CODE (call) == CALL_EXPR);
2132       e = cgraph_edge (c_node, call_stmt);
2133       gcc_assert (e);
2134       cgraph_remove_edge (e);
2135       bsi = bsi_for_stmt (call_stmt);
2136       /* Remove the call stmt.  */
2137       bsi_remove (&bsi, true);
2138       /* remove the type cast stmt.  */
2139       FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2140                              TREE_OPERAND (call_stmt, 0))
2141       {
2142         use_stmt1 = use_stmt;
2143         bsi = bsi_for_stmt (use_stmt);
2144         bsi_remove (&bsi, true);
2145       }
2146       /* Remove the assignment of the allocated area.  */
2147       FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2148                              TREE_OPERAND (use_stmt1, 0))
2149       {
2150         bsi = bsi_for_stmt (use_stmt);
2151         bsi_remove (&bsi, true);
2152       }
2153     }
2154   update_ssa (TODO_update_ssa);
2155 #ifdef ENABLE_CHECKING
2156   verify_ssa (true);
2157 #endif
2158   /* Delete the calls to free.  */
2159   for (i = 1; i < mi->min_indirect_level_escape; i++)
2160     {
2161       block_stmt_iterator bsi;
2162       tree call;
2163
2164       /* ??? wonder why this case is possible but we failed on it once.  */
2165       if (!mi->free_stmts[i].stmt)
2166         continue;
2167
2168       call = TREE_OPERAND (mi->free_stmts[i].stmt, 1);
2169       c_node = cgraph_node (mi->free_stmts[i].func);
2170
2171       gcc_assert (TREE_CODE (mi->free_stmts[i].stmt) == CALL_EXPR);
2172       e = cgraph_edge (c_node, mi->free_stmts[i].stmt);
2173       gcc_assert (e);
2174       cgraph_remove_edge (e);
2175       current_function_decl = mi->free_stmts[i].func;
2176       set_cfun (DECL_STRUCT_FUNCTION (mi->free_stmts[i].func));
2177       bsi = bsi_for_stmt (mi->free_stmts[i].stmt);
2178       bsi_remove (&bsi, true);
2179     }
2180   /* Return to the previous situation.  */
2181   current_function_decl = oldfn;
2182   pop_cfun ();
2183   return 1;
2184
2185 }
2186
2187
2188 /* Print out the results of the escape analysis.  */
2189 static int
2190 dump_matrix_reorg_analysis (void **slot, void *data ATTRIBUTE_UNUSED)
2191 {
2192   struct matrix_info *mi = (struct matrix_info *) *slot;
2193
2194   if (!dump_file)
2195     return 1;
2196   fprintf (dump_file, "Matrix \"%s\"; Escaping Level: %d, Num Dims: %d,",
2197            get_name (mi->decl), mi->min_indirect_level_escape, mi->num_dims);
2198   fprintf (dump_file, " Malloc Dims: %d, ", mi->max_malloced_level);
2199   fprintf (dump_file, "\n");
2200   if (mi->min_indirect_level_escape >= 2)
2201     fprintf (dump_file, "Flattened %d dimensions \n",
2202              mi->min_indirect_level_escape);
2203   return 1;
2204 }
2205
2206 #endif
2207 /* Perform matrix flattening.  */
2208
2209 static unsigned int
2210 matrix_reorg (void)
2211 {
2212 #if 0 /* FIXME tuples */
2213   struct cgraph_node *node;
2214
2215   if (profile_info)
2216     check_transpose_p = true;
2217   else
2218     check_transpose_p = false;
2219   /* If there are hand written vectors, we skip this optimization.  */
2220   for (node = cgraph_nodes; node; node = node->next)
2221     if (!may_flatten_matrices (node))
2222       return 0;
2223   matrices_to_reorg = htab_create (37, mtt_info_hash, mtt_info_eq, mat_free);
2224   /* Find and record all potential matrices in the program.  */
2225   find_matrices_decl ();
2226   /* Analyze the accesses of the matrices (escaping analysis).  */
2227   for (node = cgraph_nodes; node; node = node->next)
2228     if (node->analyzed)
2229       {
2230         tree temp_fn;
2231
2232         temp_fn = current_function_decl;
2233         current_function_decl = node->decl;
2234         push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2235         bitmap_obstack_initialize (NULL);
2236         gimple_register_cfg_hooks ();
2237
2238         if (!gimple_in_ssa_p (cfun))
2239           {
2240             free_dominance_info (CDI_DOMINATORS);
2241             free_dominance_info (CDI_POST_DOMINATORS);
2242             pop_cfun ();
2243             current_function_decl = temp_fn;
2244             bitmap_obstack_release (NULL);
2245
2246             return 0;
2247           }
2248
2249 #ifdef ENABLE_CHECKING
2250         verify_flow_info ();
2251 #endif
2252
2253         if (!matrices_to_reorg)
2254           {
2255             free_dominance_info (CDI_DOMINATORS);
2256             free_dominance_info (CDI_POST_DOMINATORS);
2257             pop_cfun ();
2258             current_function_decl = temp_fn;
2259             bitmap_obstack_release (NULL);
2260
2261             return 0;
2262           }
2263
2264         /* Create htap for phi nodes.  */
2265         htab_mat_acc_phi_nodes = htab_create (37, mat_acc_phi_hash,
2266                                               mat_acc_phi_eq, free);
2267         if (!check_transpose_p)
2268           find_sites_in_func (false);
2269         else
2270           {
2271             find_sites_in_func (true);
2272             loop_optimizer_init (LOOPS_NORMAL);
2273             if (current_loops)
2274               scev_initialize ();
2275             htab_traverse (matrices_to_reorg, analyze_transpose, NULL);
2276             if (current_loops)
2277               {
2278                 scev_finalize ();
2279                 loop_optimizer_finalize ();
2280                 current_loops = NULL;
2281               }
2282           }
2283         /* If the current function is the allocation function for any of
2284            the matrices we check its allocation and the escaping level.  */
2285         htab_traverse (matrices_to_reorg, check_allocation_function, NULL);
2286         free_dominance_info (CDI_DOMINATORS);
2287         free_dominance_info (CDI_POST_DOMINATORS);
2288         pop_cfun ();
2289         current_function_decl = temp_fn;
2290         bitmap_obstack_release (NULL);
2291       }
2292   htab_traverse (matrices_to_reorg, transform_allocation_sites, NULL);
2293   /* Now transform the accesses.  */
2294   for (node = cgraph_nodes; node; node = node->next)
2295     if (node->analyzed)
2296       {
2297         /* Remember that allocation sites have been handled.  */
2298         tree temp_fn;
2299
2300         temp_fn = current_function_decl;
2301         current_function_decl = node->decl;
2302         push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2303         bitmap_obstack_initialize (NULL);
2304         gimple_register_cfg_hooks ();
2305         record_all_accesses_in_func ();
2306         htab_traverse (matrices_to_reorg, transform_access_sites, NULL);
2307         free_dominance_info (CDI_DOMINATORS);
2308         free_dominance_info (CDI_POST_DOMINATORS);
2309         pop_cfun ();
2310         current_function_decl = temp_fn;
2311         bitmap_obstack_release (NULL);
2312       }
2313   htab_traverse (matrices_to_reorg, dump_matrix_reorg_analysis, NULL);
2314
2315   current_function_decl = NULL;
2316   set_cfun (NULL);
2317   matrices_to_reorg = NULL;
2318   return 0;
2319 #else
2320   gcc_unreachable ();
2321 #endif
2322 }
2323
2324
2325 /* The condition for matrix flattening to be performed.  */
2326 static bool
2327 gate_matrix_reorg (void)
2328 {
2329   /* FIXME tuples */
2330 #if 0
2331   return flag_ipa_matrix_reorg && flag_whole_program;
2332 #else
2333   return false;
2334 #endif
2335 }
2336
2337 struct simple_ipa_opt_pass pass_ipa_matrix_reorg =
2338 {
2339  {
2340   SIMPLE_IPA_PASS,
2341   "matrix-reorg",               /* name */
2342   gate_matrix_reorg,            /* gate */
2343   matrix_reorg,                 /* execute */
2344   NULL,                         /* sub */
2345   NULL,                         /* next */
2346   0,                            /* static_pass_number */
2347   0,                            /* tv_id */
2348   0,                            /* properties_required */
2349   PROP_trees,                   /* properties_provided */
2350   0,                            /* properties_destroyed */
2351   0,                            /* todo_flags_start */
2352   TODO_dump_cgraph | TODO_dump_func     /* todo_flags_finish */
2353  }
2354 };
2355