gcc/tree-data-ref.c

   1 /* Data references and dependences detectors.
   2    Copyright (C) 2003-2017 Free Software Foundation, Inc.
   3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass walks a given loop structure searching for array
  22    references.  The information about the array accesses is recorded
  23    in DATA_REFERENCE structures.
  24
  25    The basic test for determining the dependences is:
  26    given two access functions chrec1 and chrec2 to a same array, and
  27    x and y two vectors from the iteration domain, the same element of
  28    the array is accessed twice at iterations x and y if and only if:
  29    |             chrec1 (x) == chrec2 (y).
  30
  31    The goals of this analysis are:
  32
  33    - to determine the independence: the relation between two
  34      independent accesses is qualified with the chrec_known (this
  35      information allows a loop parallelization),
  36
  37    - when two data references access the same data, to qualify the
  38      dependence relation with classic dependence representations:
  39
  40        - distance vectors
  41        - direction vectors
  42        - loop carried level dependence
  43        - polyhedron dependence
  44      or with the chains of recurrences based representation,
  45
  46    - to define a knowledge base for storing the data dependence
  47      information,
  48
  49    - to define an interface to access this data.
  50
  51
  52    Definitions:
  53
  54    - subscript: given two array accesses a subscript is the tuple
  55    composed of the access functions for a given dimension.  Example:
  56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
  57    (f1, g1), (f2, g2), (f3, g3).
  58
  59    - Diophantine equation: an equation whose coefficients and
  60    solutions are integer constants, for example the equation
  61    |   3*x + 2*y = 1
  62    has an integer solution x = 1 and y = -1.
  63
  64    References:
  65
  66    - "Advanced Compilation for High Performance Computing" by Randy
  67    Allen and Ken Kennedy.
  68    http://citeseer.ist.psu.edu/goff91practical.html
  69
  70    - "Loop Transformations for Restructuring Compilers - The Foundations"
  71    by Utpal Banerjee.
  72
  73
  74 */
  75
  76 #include "config.h"
  77 #include "system.h"
  78 #include "coretypes.h"
  79 #include "backend.h"
  80 #include "rtl.h"
  81 #include "tree.h"
  82 #include "gimple.h"
  83 #include "gimple-pretty-print.h"
  84 #include "alias.h"
  85 #include "fold-const.h"
  86 #include "expr.h"
  87 #include "gimple-iterator.h"
  88 #include "tree-ssa-loop-niter.h"
  89 #include "tree-ssa-loop.h"
  90 #include "tree-ssa.h"
  91 #include "cfgloop.h"
  92 #include "tree-data-ref.h"
  93 #include "tree-scalar-evolution.h"
  94 #include "dumpfile.h"
  95 #include "tree-affine.h"
  96 #include "params.h"
  97 #include "builtins.h"
  98
  99 static struct datadep_stats
 100 {
 101   int num_dependence_tests;
 102   int num_dependence_dependent;
 103   int num_dependence_independent;
 104   int num_dependence_undetermined;
 105
 106   int num_subscript_tests;
 107   int num_subscript_undetermined;
 108   int num_same_subscript_function;
 109
 110   int num_ziv;
 111   int num_ziv_independent;
 112   int num_ziv_dependent;
 113   int num_ziv_unimplemented;
 114
 115   int num_siv;
 116   int num_siv_independent;
 117   int num_siv_dependent;
 118   int num_siv_unimplemented;
 119
 120   int num_miv;
 121   int num_miv_independent;
 122   int num_miv_dependent;
 123   int num_miv_unimplemented;
 124 } dependence_stats;
 125
 126 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
 127                                            unsigned int, unsigned int,
 128                                            struct loop *);
 129 /* Returns true iff A divides B.  */
 130
 131 static inline bool
 132 tree_fold_divides_p (const_tree a, const_tree b)
 133 {
 134   gcc_assert (TREE_CODE (a) == INTEGER_CST);
 135   gcc_assert (TREE_CODE (b) == INTEGER_CST);
 136   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
 137 }
 138
 139 /* Returns true iff A divides B.  */
 140
 141 static inline bool
 142 int_divides_p (int a, int b)
 143 {
 144   return ((b % a) == 0);
 145 }
 146
 147 /* Return true if reference REF contains a union access.  */
 148
 149 static bool
 150 ref_contains_union_access_p (tree ref)
 151 {
 152   while (handled_component_p (ref))
 153     {
 154       ref = TREE_OPERAND (ref, 0);
 155       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
 156           || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
 157         return true;
 158     }
 159   return false;
 160 }
 161
 162 \f
 163
 164 /* Dump into FILE all the data references from DATAREFS.  */
 165
 166 static void
 167 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
 168 {
 169   unsigned int i;
 170   struct data_reference *dr;
 171
 172   FOR_EACH_VEC_ELT (datarefs, i, dr)
 173     dump_data_reference (file, dr);
 174 }
 175
 176 /* Unified dump into FILE all the data references from DATAREFS.  */
 177
 178 DEBUG_FUNCTION void
 179 debug (vec<data_reference_p> &ref)
 180 {
 181   dump_data_references (stderr, ref);
 182 }
 183
 184 DEBUG_FUNCTION void
 185 debug (vec<data_reference_p> *ptr)
 186 {
 187   if (ptr)
 188     debug (*ptr);
 189   else
 190     fprintf (stderr, "<nil>\n");
 191 }
 192
 193
 194 /* Dump into STDERR all the data references from DATAREFS.  */
 195
 196 DEBUG_FUNCTION void
 197 debug_data_references (vec<data_reference_p> datarefs)
 198 {
 199   dump_data_references (stderr, datarefs);
 200 }
 201
 202 /* Print to STDERR the data_reference DR.  */
 203
 204 DEBUG_FUNCTION void
 205 debug_data_reference (struct data_reference *dr)
 206 {
 207   dump_data_reference (stderr, dr);
 208 }
 209
 210 /* Dump function for a DATA_REFERENCE structure.  */
 211
 212 void
 213 dump_data_reference (FILE *outf,
 214                      struct data_reference *dr)
 215 {
 216   unsigned int i;
 217
 218   fprintf (outf, "#(Data Ref: \n");
 219   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
 220   fprintf (outf, "#  stmt: ");
 221   print_gimple_stmt (outf, DR_STMT (dr), 0);
 222   fprintf (outf, "#  ref: ");
 223   print_generic_stmt (outf, DR_REF (dr));
 224   fprintf (outf, "#  base_object: ");
 225   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
 226
 227   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
 228     {
 229       fprintf (outf, "#  Access function %d: ", i);
 230       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
 231     }
 232   fprintf (outf, "#)\n");
 233 }
 234
 235 /* Unified dump function for a DATA_REFERENCE structure.  */
 236
 237 DEBUG_FUNCTION void
 238 debug (data_reference &ref)
 239 {
 240   dump_data_reference (stderr, &ref);
 241 }
 242
 243 DEBUG_FUNCTION void
 244 debug (data_reference *ptr)
 245 {
 246   if (ptr)
 247     debug (*ptr);
 248   else
 249     fprintf (stderr, "<nil>\n");
 250 }
 251
 252
 253 /* Dumps the affine function described by FN to the file OUTF.  */
 254
 255 DEBUG_FUNCTION void
 256 dump_affine_function (FILE *outf, affine_fn fn)
 257 {
 258   unsigned i;
 259   tree coef;
 260
 261   print_generic_expr (outf, fn[0], TDF_SLIM);
 262   for (i = 1; fn.iterate (i, &coef); i++)
 263     {
 264       fprintf (outf, " + ");
 265       print_generic_expr (outf, coef, TDF_SLIM);
 266       fprintf (outf, " * x_%u", i);
 267     }
 268 }
 269
 270 /* Dumps the conflict function CF to the file OUTF.  */
 271
 272 DEBUG_FUNCTION void
 273 dump_conflict_function (FILE *outf, conflict_function *cf)
 274 {
 275   unsigned i;
 276
 277   if (cf->n == NO_DEPENDENCE)
 278     fprintf (outf, "no dependence");
 279   else if (cf->n == NOT_KNOWN)
 280     fprintf (outf, "not known");
 281   else
 282     {
 283       for (i = 0; i < cf->n; i++)
 284         {
 285           if (i != 0)
 286             fprintf (outf, " ");
 287           fprintf (outf, "[");
 288           dump_affine_function (outf, cf->fns[i]);
 289           fprintf (outf, "]");
 290         }
 291     }
 292 }
 293
 294 /* Dump function for a SUBSCRIPT structure.  */
 295
 296 DEBUG_FUNCTION void
 297 dump_subscript (FILE *outf, struct subscript *subscript)
 298 {
 299   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
 300
 301   fprintf (outf, "\n (subscript \n");
 302   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
 303   dump_conflict_function (outf, cf);
 304   if (CF_NONTRIVIAL_P (cf))
 305     {
 306       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 307       fprintf (outf, "\n  last_conflict: ");
 308       print_generic_expr (outf, last_iteration);
 309     }
 310
 311   cf = SUB_CONFLICTS_IN_B (subscript);
 312   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
 313   dump_conflict_function (outf, cf);
 314   if (CF_NONTRIVIAL_P (cf))
 315     {
 316       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 317       fprintf (outf, "\n  last_conflict: ");
 318       print_generic_expr (outf, last_iteration);
 319     }
 320
 321   fprintf (outf, "\n  (Subscript distance: ");
 322   print_generic_expr (outf, SUB_DISTANCE (subscript));
 323   fprintf (outf, " ))\n");
 324 }
 325
 326 /* Print the classic direction vector DIRV to OUTF.  */
 327
 328 DEBUG_FUNCTION void
 329 print_direction_vector (FILE *outf,
 330                         lambda_vector dirv,
 331                         int length)
 332 {
 333   int eq;
 334
 335   for (eq = 0; eq < length; eq++)
 336     {
 337       enum data_dependence_direction dir = ((enum data_dependence_direction)
 338                                             dirv[eq]);
 339
 340       switch (dir)
 341         {
 342         case dir_positive:
 343           fprintf (outf, "    +");
 344           break;
 345         case dir_negative:
 346           fprintf (outf, "    -");
 347           break;
 348         case dir_equal:
 349           fprintf (outf, "    =");
 350           break;
 351         case dir_positive_or_equal:
 352           fprintf (outf, "   +=");
 353           break;
 354         case dir_positive_or_negative:
 355           fprintf (outf, "   +-");
 356           break;
 357         case dir_negative_or_equal:
 358           fprintf (outf, "   -=");
 359           break;
 360         case dir_star:
 361           fprintf (outf, "    *");
 362           break;
 363         default:
 364           fprintf (outf, "indep");
 365           break;
 366         }
 367     }
 368   fprintf (outf, "\n");
 369 }
 370
 371 /* Print a vector of direction vectors.  */
 372
 373 DEBUG_FUNCTION void
 374 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
 375                    int length)
 376 {
 377   unsigned j;
 378   lambda_vector v;
 379
 380   FOR_EACH_VEC_ELT (dir_vects, j, v)
 381     print_direction_vector (outf, v, length);
 382 }
 383
 384 /* Print out a vector VEC of length N to OUTFILE.  */
 385
 386 DEBUG_FUNCTION void
 387 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
 388 {
 389   int i;
 390
 391   for (i = 0; i < n; i++)
 392     fprintf (outfile, "%3d ", vector[i]);
 393   fprintf (outfile, "\n");
 394 }
 395
 396 /* Print a vector of distance vectors.  */
 397
 398 DEBUG_FUNCTION void
 399 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
 400                     int length)
 401 {
 402   unsigned j;
 403   lambda_vector v;
 404
 405   FOR_EACH_VEC_ELT (dist_vects, j, v)
 406     print_lambda_vector (outf, v, length);
 407 }
 408
 409 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
 410
 411 DEBUG_FUNCTION void
 412 dump_data_dependence_relation (FILE *outf,
 413                                struct data_dependence_relation *ddr)
 414 {
 415   struct data_reference *dra, *drb;
 416
 417   fprintf (outf, "(Data Dep: \n");
 418
 419   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 420     {
 421       if (ddr)
 422         {
 423           dra = DDR_A (ddr);
 424           drb = DDR_B (ddr);
 425           if (dra)
 426             dump_data_reference (outf, dra);
 427           else
 428             fprintf (outf, "    (nil)\n");
 429           if (drb)
 430             dump_data_reference (outf, drb);
 431           else
 432             fprintf (outf, "    (nil)\n");
 433         }
 434       fprintf (outf, "    (don't know)\n)\n");
 435       return;
 436     }
 437
 438   dra = DDR_A (ddr);
 439   drb = DDR_B (ddr);
 440   dump_data_reference (outf, dra);
 441   dump_data_reference (outf, drb);
 442
 443   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 444     fprintf (outf, "    (no dependence)\n");
 445
 446   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
 447     {
 448       unsigned int i;
 449       struct loop *loopi;
 450
 451       subscript *sub;
 452       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
 453         {
 454           fprintf (outf, "  access_fn_A: ");
 455           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
 456           fprintf (outf, "  access_fn_B: ");
 457           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
 458           dump_subscript (outf, sub);
 459         }
 460
 461       fprintf (outf, "  inner loop index: %d\n", DDR_INNER_LOOP (ddr));
 462       fprintf (outf, "  loop nest: (");
 463       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
 464         fprintf (outf, "%d ", loopi->num);
 465       fprintf (outf, ")\n");
 466
 467       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
 468         {
 469           fprintf (outf, "  distance_vector: ");
 470           print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
 471                                DDR_NB_LOOPS (ddr));
 472         }
 473
 474       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
 475         {
 476           fprintf (outf, "  direction_vector: ");
 477           print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
 478                                   DDR_NB_LOOPS (ddr));
 479         }
 480     }
 481
 482   fprintf (outf, ")\n");
 483 }
 484
 485 /* Debug version.  */
 486
 487 DEBUG_FUNCTION void
 488 debug_data_dependence_relation (struct data_dependence_relation *ddr)
 489 {
 490   dump_data_dependence_relation (stderr, ddr);
 491 }
 492
 493 /* Dump into FILE all the dependence relations from DDRS.  */
 494
 495 DEBUG_FUNCTION void
 496 dump_data_dependence_relations (FILE *file,
 497                                 vec<ddr_p> ddrs)
 498 {
 499   unsigned int i;
 500   struct data_dependence_relation *ddr;
 501
 502   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 503     dump_data_dependence_relation (file, ddr);
 504 }
 505
 506 DEBUG_FUNCTION void
 507 debug (vec<ddr_p> &ref)
 508 {
 509   dump_data_dependence_relations (stderr, ref);
 510 }
 511
 512 DEBUG_FUNCTION void
 513 debug (vec<ddr_p> *ptr)
 514 {
 515   if (ptr)
 516     debug (*ptr);
 517   else
 518     fprintf (stderr, "<nil>\n");
 519 }
 520
 521
 522 /* Dump to STDERR all the dependence relations from DDRS.  */
 523
 524 DEBUG_FUNCTION void
 525 debug_data_dependence_relations (vec<ddr_p> ddrs)
 526 {
 527   dump_data_dependence_relations (stderr, ddrs);
 528 }
 529
 530 /* Dumps the distance and direction vectors in FILE.  DDRS contains
 531    the dependence relations, and VECT_SIZE is the size of the
 532    dependence vectors, or in other words the number of loops in the
 533    considered nest.  */
 534
 535 DEBUG_FUNCTION void
 536 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
 537 {
 538   unsigned int i, j;
 539   struct data_dependence_relation *ddr;
 540   lambda_vector v;
 541
 542   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 543     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
 544       {
 545         FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
 546           {
 547             fprintf (file, "DISTANCE_V (");
 548             print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
 549             fprintf (file, ")\n");
 550           }
 551
 552         FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
 553           {
 554             fprintf (file, "DIRECTION_V (");
 555             print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
 556             fprintf (file, ")\n");
 557           }
 558       }
 559
 560   fprintf (file, "\n\n");
 561 }
 562
 563 /* Dumps the data dependence relations DDRS in FILE.  */
 564
 565 DEBUG_FUNCTION void
 566 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
 567 {
 568   unsigned int i;
 569   struct data_dependence_relation *ddr;
 570
 571   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 572     dump_data_dependence_relation (file, ddr);
 573
 574   fprintf (file, "\n\n");
 575 }
 576
 577 DEBUG_FUNCTION void
 578 debug_ddrs (vec<ddr_p> ddrs)
 579 {
 580   dump_ddrs (stderr, ddrs);
 581 }
 582
 583 /* Helper function for split_constant_offset.  Expresses OP0 CODE OP1
 584    (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
 585    constant of type ssizetype, and returns true.  If we cannot do this
 586    with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
 587    is returned.  */
 588
 589 static bool
 590 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 591                          tree *var, tree *off)
 592 {
 593   tree var0, var1;
 594   tree off0, off1;
 595   enum tree_code ocode = code;
 596
 597   *var = NULL_TREE;
 598   *off = NULL_TREE;
 599
 600   switch (code)
 601     {
 602     case INTEGER_CST:
 603       *var = build_int_cst (type, 0);
 604       *off = fold_convert (ssizetype, op0);
 605       return true;
 606
 607     case POINTER_PLUS_EXPR:
 608       ocode = PLUS_EXPR;
 609       /* FALLTHROUGH */
 610     case PLUS_EXPR:
 611     case MINUS_EXPR:
 612       split_constant_offset (op0, &var0, &off0);
 613       split_constant_offset (op1, &var1, &off1);
 614       *var = fold_build2 (code, type, var0, var1);
 615       *off = size_binop (ocode, off0, off1);
 616       return true;
 617
 618     case MULT_EXPR:
 619       if (TREE_CODE (op1) != INTEGER_CST)
 620         return false;
 621
 622       split_constant_offset (op0, &var0, &off0);
 623       *var = fold_build2 (MULT_EXPR, type, var0, op1);
 624       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
 625       return true;
 626
 627     case ADDR_EXPR:
 628       {
 629         tree base, poffset;
 630         poly_int64 pbitsize, pbitpos, pbytepos;
 631         machine_mode pmode;
 632         int punsignedp, preversep, pvolatilep;
 633
 634         op0 = TREE_OPERAND (op0, 0);
 635         base
 636           = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
 637                                  &punsignedp, &preversep, &pvolatilep);
 638
 639         if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 640           return false;
 641         base = build_fold_addr_expr (base);
 642         off0 = ssize_int (pbytepos);
 643
 644         if (poffset)
 645           {
 646             split_constant_offset (poffset, &poffset, &off1);
 647             off0 = size_binop (PLUS_EXPR, off0, off1);
 648             if (POINTER_TYPE_P (TREE_TYPE (base)))
 649               base = fold_build_pointer_plus (base, poffset);
 650             else
 651               base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
 652                                   fold_convert (TREE_TYPE (base), poffset));
 653           }
 654
 655         var0 = fold_convert (type, base);
 656
 657         /* If variable length types are involved, punt, otherwise casts
 658            might be converted into ARRAY_REFs in gimplify_conversion.
 659            To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
 660            possibly no longer appears in current GIMPLE, might resurface.
 661            This perhaps could run
 662            if (CONVERT_EXPR_P (var0))
 663              {
 664                gimplify_conversion (&var0);
 665                // Attempt to fill in any within var0 found ARRAY_REF's
 666                // element size from corresponding op embedded ARRAY_REF,
 667                // if unsuccessful, just punt.
 668              }  */
 669         while (POINTER_TYPE_P (type))
 670           type = TREE_TYPE (type);
 671         if (int_size_in_bytes (type) < 0)
 672           return false;
 673
 674         *var = var0;
 675         *off = off0;
 676         return true;
 677       }
 678
 679     case SSA_NAME:
 680       {
 681         if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
 682           return false;
 683
 684         gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
 685         enum tree_code subcode;
 686
 687         if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
 688           return false;
 689
 690         var0 = gimple_assign_rhs1 (def_stmt);
 691         subcode = gimple_assign_rhs_code (def_stmt);
 692         var1 = gimple_assign_rhs2 (def_stmt);
 693
 694         return split_constant_offset_1 (type, var0, subcode, var1, var, off);
 695       }
 696     CASE_CONVERT:
 697       {
 698         /* We must not introduce undefined overflow, and we must not change the value.
 699            Hence we're okay if the inner type doesn't overflow to start with
 700            (pointer or signed), the outer type also is an integer or pointer
 701            and the outer precision is at least as large as the inner.  */
 702         tree itype = TREE_TYPE (op0);
 703         if ((POINTER_TYPE_P (itype)
 704              || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
 705             && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
 706             && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
 707           {
 708             split_constant_offset (op0, &var0, off);
 709             *var = fold_convert (type, var0);
 710             return true;
 711           }
 712         return false;
 713       }
 714
 715     default:
 716       return false;
 717     }
 718 }
 719
 720 /* Expresses EXP as VAR + OFF, where off is a constant.  The type of OFF
 721    will be ssizetype.  */
 722
 723 void
 724 split_constant_offset (tree exp, tree *var, tree *off)
 725 {
 726   tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
 727   enum tree_code code;
 728
 729   *var = exp;
 730   *off = ssize_int (0);
 731   STRIP_NOPS (exp);
 732
 733   if (tree_is_chrec (exp)
 734       || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
 735     return;
 736
 737   otype = TREE_TYPE (exp);
 738   code = TREE_CODE (exp);
 739   extract_ops_from_tree (exp, &code, &op0, &op1);
 740   if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
 741     {
 742       *var = fold_convert (type, e);
 743       *off = o;
 744     }
 745 }
 746
 747 /* Returns the address ADDR of an object in a canonical shape (without nop
 748    casts, and with type of pointer to the object).  */
 749
 750 static tree
 751 canonicalize_base_object_address (tree addr)
 752 {
 753   tree orig = addr;
 754
 755   STRIP_NOPS (addr);
 756
 757   /* The base address may be obtained by casting from integer, in that case
 758      keep the cast.  */
 759   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
 760     return orig;
 761
 762   if (TREE_CODE (addr) != ADDR_EXPR)
 763     return addr;
 764
 765   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
 766 }
 767
 768 /* Analyze the behavior of memory reference REF.  There are two modes:
 769
 770    - BB analysis.  In this case we simply split the address into base,
 771      init and offset components, without reference to any containing loop.
 772      The resulting base and offset are general expressions and they can
 773      vary arbitrarily from one iteration of the containing loop to the next.
 774      The step is always zero.
 775
 776    - loop analysis.  In this case we analyze the reference both wrt LOOP
 777      and on the basis that the reference occurs (is "used") in LOOP;
 778      see the comment above analyze_scalar_evolution_in_loop for more
 779      information about this distinction.  The base, init, offset and
 780      step fields are all invariant in LOOP.
 781
 782    Perform BB analysis if LOOP is null, or if LOOP is the function's
 783    dummy outermost loop.  In other cases perform loop analysis.
 784
 785    Return true if the analysis succeeded and store the results in DRB if so.
 786    BB analysis can only fail for bitfield or reversed-storage accesses.  */
 787
 788 bool
 789 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
 790                       struct loop *loop)
 791 {
 792   poly_int64 pbitsize, pbitpos;
 793   tree base, poffset;
 794   machine_mode pmode;
 795   int punsignedp, preversep, pvolatilep;
 796   affine_iv base_iv, offset_iv;
 797   tree init, dinit, step;
 798   bool in_loop = (loop && loop->num);
 799
 800   if (dump_file && (dump_flags & TDF_DETAILS))
 801     fprintf (dump_file, "analyze_innermost: ");
 802
 803   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
 804                               &punsignedp, &preversep, &pvolatilep);
 805   gcc_assert (base != NULL_TREE);
 806
 807   poly_int64 pbytepos;
 808   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 809     {
 810       if (dump_file && (dump_flags & TDF_DETAILS))
 811         fprintf (dump_file, "failed: bit offset alignment.\n");
 812       return false;
 813     }
 814
 815   if (preversep)
 816     {
 817       if (dump_file && (dump_flags & TDF_DETAILS))
 818         fprintf (dump_file, "failed: reverse storage order.\n");
 819       return false;
 820     }
 821
 822   /* Calculate the alignment and misalignment for the inner reference.  */
 823   unsigned int HOST_WIDE_INT bit_base_misalignment;
 824   unsigned int bit_base_alignment;
 825   get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
 826
 827   /* There are no bitfield references remaining in BASE, so the values
 828      we got back must be whole bytes.  */
 829   gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
 830               && bit_base_misalignment % BITS_PER_UNIT == 0);
 831   unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
 832   poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
 833
 834   if (TREE_CODE (base) == MEM_REF)
 835     {
 836       if (!integer_zerop (TREE_OPERAND (base, 1)))
 837         {
 838           /* Subtract MOFF from the base and add it to POFFSET instead.
 839              Adjust the misalignment to reflect the amount we subtracted.  */
 840           poly_offset_int moff = mem_ref_offset (base);
 841           base_misalignment -= moff.force_shwi ();
 842           tree mofft = wide_int_to_tree (sizetype, moff);
 843           if (!poffset)
 844             poffset = mofft;
 845           else
 846             poffset = size_binop (PLUS_EXPR, poffset, mofft);
 847         }
 848       base = TREE_OPERAND (base, 0);
 849     }
 850   else
 851     base = build_fold_addr_expr (base);
 852
 853   if (in_loop)
 854     {
 855       if (!simple_iv (loop, loop, base, &base_iv, true))
 856         {
 857           if (dump_file && (dump_flags & TDF_DETAILS))
 858             fprintf (dump_file, "failed: evolution of base is not affine.\n");
 859           return false;
 860         }
 861     }
 862   else
 863     {
 864       base_iv.base = base;
 865       base_iv.step = ssize_int (0);
 866       base_iv.no_overflow = true;
 867     }
 868
 869   if (!poffset)
 870     {
 871       offset_iv.base = ssize_int (0);
 872       offset_iv.step = ssize_int (0);
 873     }
 874   else
 875     {
 876       if (!in_loop)
 877         {
 878           offset_iv.base = poffset;
 879           offset_iv.step = ssize_int (0);
 880         }
 881       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
 882         {
 883           if (dump_file && (dump_flags & TDF_DETAILS))
 884             fprintf (dump_file, "failed: evolution of offset is not affine.\n");
 885           return false;
 886         }
 887     }
 888
 889   init = ssize_int (pbytepos);
 890
 891   /* Subtract any constant component from the base and add it to INIT instead.
 892      Adjust the misalignment to reflect the amount we subtracted.  */
 893   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
 894   init = size_binop (PLUS_EXPR, init, dinit);
 895   base_misalignment -= TREE_INT_CST_LOW (dinit);
 896
 897   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
 898   init = size_binop (PLUS_EXPR, init, dinit);
 899
 900   step = size_binop (PLUS_EXPR,
 901                      fold_convert (ssizetype, base_iv.step),
 902                      fold_convert (ssizetype, offset_iv.step));
 903
 904   base = canonicalize_base_object_address (base_iv.base);
 905
 906   /* See if get_pointer_alignment can guarantee a higher alignment than
 907      the one we calculated above.  */
 908   unsigned int HOST_WIDE_INT alt_misalignment;
 909   unsigned int alt_alignment;
 910   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
 911
 912   /* As above, these values must be whole bytes.  */
 913   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
 914               && alt_misalignment % BITS_PER_UNIT == 0);
 915   alt_alignment /= BITS_PER_UNIT;
 916   alt_misalignment /= BITS_PER_UNIT;
 917
 918   if (base_alignment < alt_alignment)
 919     {
 920       base_alignment = alt_alignment;
 921       base_misalignment = alt_misalignment;
 922     }
 923
 924   drb->base_address = base;
 925   drb->offset = fold_convert (ssizetype, offset_iv.base);
 926   drb->init = init;
 927   drb->step = step;
 928   if (known_misalignment (base_misalignment, base_alignment,
 929                           &drb->base_misalignment))
 930     drb->base_alignment = base_alignment;
 931   else
 932     {
 933       drb->base_alignment = known_alignment (base_misalignment);
 934       drb->base_misalignment = 0;
 935     }
 936   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
 937   drb->step_alignment = highest_pow2_factor (step);
 938
 939   if (dump_file && (dump_flags & TDF_DETAILS))
 940     fprintf (dump_file, "success.\n");
 941
 942   return true;
 943 }
 944
 945 /* Return true if OP is a valid component reference for a DR access
 946    function.  This accepts a subset of what handled_component_p accepts.  */
 947
 948 static bool
 949 access_fn_component_p (tree op)
 950 {
 951   switch (TREE_CODE (op))
 952     {
 953     case REALPART_EXPR:
 954     case IMAGPART_EXPR:
 955     case ARRAY_REF:
 956       return true;
 957
 958     case COMPONENT_REF:
 959       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
 960
 961     default:
 962       return false;
 963     }
 964 }
 965
 966 /* Determines the base object and the list of indices of memory reference
 967    DR, analyzed in LOOP and instantiated before NEST.  */
 968
 969 static void
 970 dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
 971 {
 972   vec<tree> access_fns = vNULL;
 973   tree ref, op;
 974   tree base, off, access_fn;
 975
 976   /* If analyzing a basic-block there are no indices to analyze
 977      and thus no access functions.  */
 978   if (!nest)
 979     {
 980       DR_BASE_OBJECT (dr) = DR_REF (dr);
 981       DR_ACCESS_FNS (dr).create (0);
 982       return;
 983     }
 984
 985   ref = DR_REF (dr);
 986
 987   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
 988      into a two element array with a constant index.  The base is
 989      then just the immediate underlying object.  */
 990   if (TREE_CODE (ref) == REALPART_EXPR)
 991     {
 992       ref = TREE_OPERAND (ref, 0);
 993       access_fns.safe_push (integer_zero_node);
 994     }
 995   else if (TREE_CODE (ref) == IMAGPART_EXPR)
 996     {
 997       ref = TREE_OPERAND (ref, 0);
 998       access_fns.safe_push (integer_one_node);
 999     }
1000
1001   /* Analyze access functions of dimensions we know to be independent.
1002      The list of component references handled here should be kept in
1003      sync with access_fn_component_p.  */
1004   while (handled_component_p (ref))
1005     {
1006       if (TREE_CODE (ref) == ARRAY_REF)
1007         {
1008           op = TREE_OPERAND (ref, 1);
1009           access_fn = analyze_scalar_evolution (loop, op);
1010           access_fn = instantiate_scev (nest, loop, access_fn);
1011           access_fns.safe_push (access_fn);
1012         }
1013       else if (TREE_CODE (ref) == COMPONENT_REF
1014                && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1015         {
1016           /* For COMPONENT_REFs of records (but not unions!) use the
1017              FIELD_DECL offset as constant access function so we can
1018              disambiguate a[i].f1 and a[i].f2.  */
1019           tree off = component_ref_field_offset (ref);
1020           off = size_binop (PLUS_EXPR,
1021                             size_binop (MULT_EXPR,
1022                                         fold_convert (bitsizetype, off),
1023                                         bitsize_int (BITS_PER_UNIT)),
1024                             DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1025           access_fns.safe_push (off);
1026         }
1027       else
1028         /* If we have an unhandled component we could not translate
1029            to an access function stop analyzing.  We have determined
1030            our base object in this case.  */
1031         break;
1032
1033       ref = TREE_OPERAND (ref, 0);
1034     }
1035
1036   /* If the address operand of a MEM_REF base has an evolution in the
1037      analyzed nest, add it as an additional independent access-function.  */
1038   if (TREE_CODE (ref) == MEM_REF)
1039     {
1040       op = TREE_OPERAND (ref, 0);
1041       access_fn = analyze_scalar_evolution (loop, op);
1042       access_fn = instantiate_scev (nest, loop, access_fn);
1043       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1044         {
1045           tree orig_type;
1046           tree memoff = TREE_OPERAND (ref, 1);
1047           base = initial_condition (access_fn);
1048           orig_type = TREE_TYPE (base);
1049           STRIP_USELESS_TYPE_CONVERSION (base);
1050           split_constant_offset (base, &base, &off);
1051           STRIP_USELESS_TYPE_CONVERSION (base);
1052           /* Fold the MEM_REF offset into the evolutions initial
1053              value to make more bases comparable.  */
1054           if (!integer_zerop (memoff))
1055             {
1056               off = size_binop (PLUS_EXPR, off,
1057                                 fold_convert (ssizetype, memoff));
1058               memoff = build_int_cst (TREE_TYPE (memoff), 0);
1059             }
1060           /* Adjust the offset so it is a multiple of the access type
1061              size and thus we separate bases that can possibly be used
1062              to produce partial overlaps (which the access_fn machinery
1063              cannot handle).  */
1064           wide_int rem;
1065           if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1066               && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1067               && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1068             rem = wi::mod_trunc
1069               (wi::to_wide (off),
1070                wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1071                SIGNED);
1072           else
1073             /* If we can't compute the remainder simply force the initial
1074                condition to zero.  */
1075             rem = wi::to_wide (off);
1076           off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1077           memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1078           /* And finally replace the initial condition.  */
1079           access_fn = chrec_replace_initial_condition
1080               (access_fn, fold_convert (orig_type, off));
1081           /* ???  This is still not a suitable base object for
1082              dr_may_alias_p - the base object needs to be an
1083              access that covers the object as whole.  With
1084              an evolution in the pointer this cannot be
1085              guaranteed.
1086              As a band-aid, mark the access so we can special-case
1087              it in dr_may_alias_p.  */
1088           tree old = ref;
1089           ref = fold_build2_loc (EXPR_LOCATION (ref),
1090                                  MEM_REF, TREE_TYPE (ref),
1091                                  base, memoff);
1092           MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1093           MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1094           DR_UNCONSTRAINED_BASE (dr) = true;
1095           access_fns.safe_push (access_fn);
1096         }
1097     }
1098   else if (DECL_P (ref))
1099     {
1100       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1101       ref = build2 (MEM_REF, TREE_TYPE (ref),
1102                     build_fold_addr_expr (ref),
1103                     build_int_cst (reference_alias_ptr_type (ref), 0));
1104     }
1105
1106   DR_BASE_OBJECT (dr) = ref;
1107   DR_ACCESS_FNS (dr) = access_fns;
1108 }
1109
1110 /* Extracts the alias analysis information from the memory reference DR.  */
1111
1112 static void
1113 dr_analyze_alias (struct data_reference *dr)
1114 {
1115   tree ref = DR_REF (dr);
1116   tree base = get_base_address (ref), addr;
1117
1118   if (INDIRECT_REF_P (base)
1119       || TREE_CODE (base) == MEM_REF)
1120     {
1121       addr = TREE_OPERAND (base, 0);
1122       if (TREE_CODE (addr) == SSA_NAME)
1123         DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1124     }
1125 }
1126
1127 /* Frees data reference DR.  */
1128
1129 void
1130 free_data_ref (data_reference_p dr)
1131 {
1132   DR_ACCESS_FNS (dr).release ();
1133   free (dr);
1134 }
1135
1136 /* Analyze memory reference MEMREF, which is accessed in STMT.
1137    The reference is a read if IS_READ is true, otherwise it is a write.
1138    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1139    within STMT, i.e. that it might not occur even if STMT is executed
1140    and runs to completion.
1141
1142    Return the data_reference description of MEMREF.  NEST is the outermost
1143    loop in which the reference should be instantiated, LOOP is the loop
1144    in which the data reference should be analyzed.  */
1145
1146 struct data_reference *
1147 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1148                  bool is_read, bool is_conditional_in_stmt)
1149 {
1150   struct data_reference *dr;
1151
1152   if (dump_file && (dump_flags & TDF_DETAILS))
1153     {
1154       fprintf (dump_file, "Creating dr for ");
1155       print_generic_expr (dump_file, memref, TDF_SLIM);
1156       fprintf (dump_file, "\n");
1157     }
1158
1159   dr = XCNEW (struct data_reference);
1160   DR_STMT (dr) = stmt;
1161   DR_REF (dr) = memref;
1162   DR_IS_READ (dr) = is_read;
1163   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1164
1165   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1166                         nest != NULL ? loop : NULL);
1167   dr_analyze_indices (dr, nest, loop);
1168   dr_analyze_alias (dr);
1169
1170   if (dump_file && (dump_flags & TDF_DETAILS))
1171     {
1172       unsigned i;
1173       fprintf (dump_file, "\tbase_address: ");
1174       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1175       fprintf (dump_file, "\n\toffset from base address: ");
1176       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1177       fprintf (dump_file, "\n\tconstant offset from base address: ");
1178       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1179       fprintf (dump_file, "\n\tstep: ");
1180       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1181       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1182       fprintf (dump_file, "\n\tbase misalignment: %d",
1183                DR_BASE_MISALIGNMENT (dr));
1184       fprintf (dump_file, "\n\toffset alignment: %d",
1185                DR_OFFSET_ALIGNMENT (dr));
1186       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1187       fprintf (dump_file, "\n\tbase_object: ");
1188       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1189       fprintf (dump_file, "\n");
1190       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1191         {
1192           fprintf (dump_file, "\tAccess function %d: ", i);
1193           print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1194         }
1195     }
1196
1197   return dr;
1198 }
1199
1200 /*  A helper function computes order between two tree epxressions T1 and T2.
1201     This is used in comparator functions sorting objects based on the order
1202     of tree expressions.  The function returns -1, 0, or 1.  */
1203
1204 int
1205 data_ref_compare_tree (tree t1, tree t2)
1206 {
1207   int i, cmp;
1208   enum tree_code code;
1209   char tclass;
1210
1211   if (t1 == t2)
1212     return 0;
1213   if (t1 == NULL)
1214     return -1;
1215   if (t2 == NULL)
1216     return 1;
1217
1218   STRIP_USELESS_TYPE_CONVERSION (t1);
1219   STRIP_USELESS_TYPE_CONVERSION (t2);
1220   if (t1 == t2)
1221     return 0;
1222
1223   if (TREE_CODE (t1) != TREE_CODE (t2)
1224       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1225     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1226
1227   code = TREE_CODE (t1);
1228   switch (code)
1229     {
1230     case INTEGER_CST:
1231       return tree_int_cst_compare (t1, t2);
1232
1233     case STRING_CST:
1234       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1235         return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1236       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1237                      TREE_STRING_LENGTH (t1));
1238
1239     case SSA_NAME:
1240       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1241         return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1242       break;
1243
1244     default:
1245       if (POLY_INT_CST_P (t1))
1246         return compare_sizes_for_sort (wi::to_poly_widest (t1),
1247                                        wi::to_poly_widest (t2));
1248
1249       tclass = TREE_CODE_CLASS (code);
1250
1251       /* For decls, compare their UIDs.  */
1252       if (tclass == tcc_declaration)
1253         {
1254           if (DECL_UID (t1) != DECL_UID (t2))
1255             return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1256           break;
1257         }
1258       /* For expressions, compare their operands recursively.  */
1259       else if (IS_EXPR_CODE_CLASS (tclass))
1260         {
1261           for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1262             {
1263               cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1264                                            TREE_OPERAND (t2, i));
1265               if (cmp != 0)
1266                 return cmp;
1267             }
1268         }
1269       else
1270         gcc_unreachable ();
1271     }
1272
1273   return 0;
1274 }
1275
1276 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1277    check.  */
1278
1279 bool
1280 runtime_alias_check_p (ddr_p ddr, struct loop *loop, bool speed_p)
1281 {
1282   if (dump_enabled_p ())
1283     {
1284       dump_printf (MSG_NOTE, "consider run-time aliasing test between ");
1285       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
1286       dump_printf (MSG_NOTE,  " and ");
1287       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
1288       dump_printf (MSG_NOTE, "\n");
1289     }
1290
1291   if (!speed_p)
1292     {
1293       if (dump_enabled_p ())
1294         dump_printf (MSG_MISSED_OPTIMIZATION,
1295                      "runtime alias check not supported when optimizing "
1296                      "for size.\n");
1297       return false;
1298     }
1299
1300   /* FORNOW: We don't support versioning with outer-loop in either
1301      vectorization or loop distribution.  */
1302   if (loop != NULL && loop->inner != NULL)
1303     {
1304       if (dump_enabled_p ())
1305         dump_printf (MSG_MISSED_OPTIMIZATION,
1306                      "runtime alias check not supported for outer loop.\n");
1307       return false;
1308     }
1309
1310   /* FORNOW: We don't support creating runtime alias tests for non-constant
1311      step.  */
1312   if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
1313       || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
1314     {
1315       if (dump_enabled_p ())
1316         dump_printf (MSG_MISSED_OPTIMIZATION,
1317                      "runtime alias check not supported for non-constant "
1318                      "step\n");
1319       return false;
1320     }
1321
1322   return true;
1323 }
1324
1325 /* Operator == between two dr_with_seg_len objects.
1326
1327    This equality operator is used to make sure two data refs
1328    are the same one so that we will consider to combine the
1329    aliasing checks of those two pairs of data dependent data
1330    refs.  */
1331
1332 static bool
1333 operator == (const dr_with_seg_len& d1,
1334              const dr_with_seg_len& d2)
1335 {
1336   return operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1337                           DR_BASE_ADDRESS (d2.dr), 0)
1338            && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1339            && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1340            && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0;
1341 }
1342
1343 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1344    so that we can combine aliasing checks in one scan.  */
1345
1346 static int
1347 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1348 {
1349   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1350   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1351   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1352   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1353
1354   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1355      if a and c have the same basic address snd step, and b and d have the same
1356      address and step.  Therefore, if any a&c or b&d don't have the same address
1357      and step, we don't care the order of those two pairs after sorting.  */
1358   int comp_res;
1359
1360   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1361                                          DR_BASE_ADDRESS (b1.dr))) != 0)
1362     return comp_res;
1363   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1364                                          DR_BASE_ADDRESS (b2.dr))) != 0)
1365     return comp_res;
1366   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1367                                          DR_STEP (b1.dr))) != 0)
1368     return comp_res;
1369   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1370                                          DR_STEP (b2.dr))) != 0)
1371     return comp_res;
1372   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1373                                          DR_OFFSET (b1.dr))) != 0)
1374     return comp_res;
1375   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1376                                          DR_INIT (b1.dr))) != 0)
1377     return comp_res;
1378   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1379                                          DR_OFFSET (b2.dr))) != 0)
1380     return comp_res;
1381   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1382                                          DR_INIT (b2.dr))) != 0)
1383     return comp_res;
1384
1385   return 0;
1386 }
1387
1388 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1389    FACTOR is number of iterations that each data reference is accessed.
1390
1391    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1392    we create an expression:
1393
1394    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1395    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1396
1397    for aliasing checks.  However, in some cases we can decrease the number
1398    of checks by combining two checks into one.  For example, suppose we have
1399    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1400    condition is satisfied:
1401
1402    load_ptr_0 < load_ptr_1  &&
1403    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1404
1405    (this condition means, in each iteration of vectorized loop, the accessed
1406    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1407    load_ptr_1.)
1408
1409    we then can use only the following expression to finish the alising checks
1410    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1411
1412    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1413    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1414
1415    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1416    basic address.  */
1417
1418 void
1419 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1420                                poly_uint64 factor)
1421 {
1422   /* Sort the collected data ref pairs so that we can scan them once to
1423      combine all possible aliasing checks.  */
1424   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1425
1426   /* Scan the sorted dr pairs and check if we can combine alias checks
1427      of two neighboring dr pairs.  */
1428   for (size_t i = 1; i < alias_pairs->length (); ++i)
1429     {
1430       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1431       dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
1432                       *dr_b1 = &(*alias_pairs)[i-1].second,
1433                       *dr_a2 = &(*alias_pairs)[i].first,
1434                       *dr_b2 = &(*alias_pairs)[i].second;
1435
1436       /* Remove duplicate data ref pairs.  */
1437       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1438         {
1439           if (dump_enabled_p ())
1440             {
1441               dump_printf (MSG_NOTE, "found equal ranges ");
1442               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1443               dump_printf (MSG_NOTE,  ", ");
1444               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1445               dump_printf (MSG_NOTE,  " and ");
1446               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1447               dump_printf (MSG_NOTE,  ", ");
1448               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1449               dump_printf (MSG_NOTE, "\n");
1450             }
1451           alias_pairs->ordered_remove (i--);
1452           continue;
1453         }
1454
1455       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1456         {
1457           /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1458              and DR_A1 and DR_A2 are two consecutive memrefs.  */
1459           if (*dr_a1 == *dr_a2)
1460             {
1461               std::swap (dr_a1, dr_b1);
1462               std::swap (dr_a2, dr_b2);
1463             }
1464
1465           poly_int64 init_a1, init_a2;
1466           if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1467                                 DR_BASE_ADDRESS (dr_a2->dr), 0)
1468               || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1469                                    DR_OFFSET (dr_a2->dr), 0)
1470               || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1471               || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1472             continue;
1473
1474           /* Don't combine if we can't tell which one comes first.  */
1475           if (!ordered_p (init_a1, init_a2))
1476             continue;
1477
1478           /* Make sure dr_a1 starts left of dr_a2.  */
1479           if (maybe_gt (init_a1, init_a2))
1480             {
1481               std::swap (*dr_a1, *dr_a2);
1482               std::swap (init_a1, init_a2);
1483             }
1484
1485           /* Only merge const step data references.  */
1486           poly_int64 step_a1, step_a2;
1487           if (!poly_int_tree_p (DR_STEP (dr_a1->dr), &step_a1)
1488               || !poly_int_tree_p (DR_STEP (dr_a2->dr), &step_a2))
1489             continue;
1490
1491           bool neg_step = maybe_lt (step_a1, 0) || maybe_lt (step_a2, 0);
1492
1493           /* DR_A1 and DR_A2 must go in the same direction.  */
1494           if (neg_step && (maybe_gt (step_a1, 0) || maybe_gt (step_a2, 0)))
1495             continue;
1496
1497           poly_uint64 seg_len_a1 = 0, seg_len_a2 = 0;
1498           bool const_seg_len_a1 = poly_int_tree_p (dr_a1->seg_len,
1499                                                    &seg_len_a1);
1500           bool const_seg_len_a2 = poly_int_tree_p (dr_a2->seg_len,
1501                                                    &seg_len_a2);
1502
1503           /* We need to compute merged segment length at compilation time for
1504              dr_a1 and dr_a2, which is impossible if either one has non-const
1505              segment length.  */
1506           if ((!const_seg_len_a1 || !const_seg_len_a2)
1507               && maybe_ne (step_a1, step_a2))
1508             continue;
1509
1510           bool do_remove = false;
1511           poly_uint64 diff = init_a2 - init_a1;
1512           poly_uint64 min_seg_len_b;
1513           tree new_seg_len;
1514
1515           if (!poly_int_tree_p (dr_b1->seg_len, &min_seg_len_b))
1516             {
1517               tree step_b = DR_STEP (dr_b1->dr);
1518               if (!tree_fits_shwi_p (step_b))
1519                 continue;
1520               min_seg_len_b = factor * abs_hwi (tree_to_shwi (step_b));
1521             }
1522
1523           /* Now we try to merge alias check dr_a1 & dr_b and dr_a2 & dr_b.
1524
1525              Case A:
1526                check if the following condition is satisfied:
1527
1528                DIFF - SEGMENT_LENGTH_A < SEGMENT_LENGTH_B
1529
1530                where DIFF = DR_A2_INIT - DR_A1_INIT.  However,
1531                SEGMENT_LENGTH_A or SEGMENT_LENGTH_B may not be constant so we
1532                have to make a best estimation.  We can get the minimum value
1533                of SEGMENT_LENGTH_B as a constant, represented by MIN_SEG_LEN_B,
1534                then either of the following two conditions can guarantee the
1535                one above:
1536
1537                1: DIFF <= MIN_SEG_LEN_B
1538                2: DIFF - SEGMENT_LENGTH_A < MIN_SEG_LEN_B
1539                   Because DIFF - SEGMENT_LENGTH_A is done in sizetype, we need
1540                   to take care of wrapping behavior in it.
1541
1542              Case B:
1543                If the left segment does not extend beyond the start of the
1544                right segment the new segment length is that of the right
1545                plus the segment distance.  The condition is like:
1546
1547                DIFF >= SEGMENT_LENGTH_A   ;SEGMENT_LENGTH_A is a constant.
1548
1549              Note 1: Case A.2 and B combined together effectively merges every
1550              dr_a1 & dr_b and dr_a2 & dr_b when SEGMENT_LENGTH_A is const.
1551
1552              Note 2: Above description is based on positive DR_STEP, we need to
1553              take care of negative DR_STEP for wrapping behavior.  See PR80815
1554              for more information.  */
1555           if (neg_step)
1556             {
1557               /* Adjust diff according to access size of both references.  */
1558               diff += tree_to_poly_uint64
1559                 (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a2->dr))));
1560               diff -= tree_to_poly_uint64
1561                 (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a1->dr))));
1562               /* Case A.1.  */
1563               if (known_le (diff, min_seg_len_b)
1564                   /* Case A.2 and B combined.  */
1565                   || const_seg_len_a2)
1566                 {
1567                   if (const_seg_len_a1 || const_seg_len_a2)
1568                     new_seg_len
1569                       = build_int_cstu (sizetype,
1570                                         lower_bound (seg_len_a1 - diff,
1571                                                      seg_len_a2));
1572                   else
1573                     new_seg_len
1574                       = size_binop (MINUS_EXPR, dr_a2->seg_len,
1575                                     build_int_cstu (sizetype, diff));
1576
1577                   dr_a2->seg_len = new_seg_len;
1578                   do_remove = true;
1579                 }
1580             }
1581           else
1582             {
1583               /* Case A.1.  */
1584               if (known_le (diff, min_seg_len_b)
1585                   /* Case A.2 and B combined.  */
1586                   || const_seg_len_a1)
1587                 {
1588                   if (const_seg_len_a1 && const_seg_len_a2)
1589                     new_seg_len
1590                       = build_int_cstu (sizetype,
1591                                         upper_bound (seg_len_a2 + diff,
1592                                                      seg_len_a1));
1593                   else
1594                     new_seg_len
1595                       = size_binop (PLUS_EXPR, dr_a2->seg_len,
1596                                     build_int_cstu (sizetype, diff));
1597
1598                   dr_a1->seg_len = new_seg_len;
1599                   do_remove = true;
1600                 }
1601             }
1602
1603           if (do_remove)
1604             {
1605               if (dump_enabled_p ())
1606                 {
1607                   dump_printf (MSG_NOTE, "merging ranges for ");
1608                   dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1609                   dump_printf (MSG_NOTE,  ", ");
1610                   dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1611                   dump_printf (MSG_NOTE,  " and ");
1612                   dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1613                   dump_printf (MSG_NOTE,  ", ");
1614                   dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1615                   dump_printf (MSG_NOTE, "\n");
1616                 }
1617               alias_pairs->ordered_remove (neg_step ? i - 1 : i);
1618               i--;
1619             }
1620         }
1621     }
1622 }
1623
1624 /* Given LOOP's two data references and segment lengths described by DR_A
1625    and DR_B, create expression checking if the two addresses ranges intersect
1626    with each other based on index of the two addresses.  This can only be
1627    done if DR_A and DR_B referring to the same (array) object and the index
1628    is the only difference.  For example:
1629
1630                        DR_A                           DR_B
1631       data-ref         arr[i]                         arr[j]
1632       base_object      arr                            arr
1633       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
1634
1635    The addresses and their index are like:
1636
1637         |<- ADDR_A    ->|          |<- ADDR_B    ->|
1638      ------------------------------------------------------->
1639         |   |   |   |   |          |   |   |   |   |
1640      ------------------------------------------------------->
1641         i_0 ...         i_0+4      j_0 ...         j_0+4
1642
1643    We can create expression based on index rather than address:
1644
1645      (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1646
1647    Note evolution step of index needs to be considered in comparison.  */
1648
1649 static bool
1650 create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
1651                                      const dr_with_seg_len& dr_a,
1652                                      const dr_with_seg_len& dr_b)
1653 {
1654   if (integer_zerop (DR_STEP (dr_a.dr))
1655       || integer_zerop (DR_STEP (dr_b.dr))
1656       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1657     return false;
1658
1659   if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
1660     return false;
1661
1662   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1663     return false;
1664
1665   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1666     return false;
1667
1668   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1669     return false;
1670
1671   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1672
1673   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1674   unsigned HOST_WIDE_INT abs_step
1675     = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr)));
1676
1677   unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
1678   unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
1679   /* Infer the number of iterations with which the memory segment is accessed
1680      by DR.  In other words, alias is checked if memory segment accessed by
1681      DR_A in some iterations intersect with memory segment accessed by DR_B
1682      in the same amount iterations.
1683      Note segnment length is a linear function of number of iterations with
1684      DR_STEP as the coefficient.  */
1685   unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step;
1686   unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step;
1687
1688   unsigned int i;
1689   for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1690     {
1691       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1692       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1693       /* Two indices must be the same if they are not scev, or not scev wrto
1694          current loop being vecorized.  */
1695       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1696           || TREE_CODE (access2) != POLYNOMIAL_CHREC
1697           || CHREC_VARIABLE (access1) != (unsigned)loop->num
1698           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1699         {
1700           if (operand_equal_p (access1, access2, 0))
1701             continue;
1702
1703           return false;
1704         }
1705       /* The two indices must have the same step.  */
1706       if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1707         return false;
1708
1709       tree idx_step = CHREC_RIGHT (access1);
1710       /* Index must have const step, otherwise DR_STEP won't be constant.  */
1711       gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1712       /* Index must evaluate in the same direction as DR.  */
1713       gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1714
1715       tree min1 = CHREC_LEFT (access1);
1716       tree min2 = CHREC_LEFT (access2);
1717       if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1718         return false;
1719
1720       /* Ideally, alias can be checked against loop's control IV, but we
1721          need to prove linear mapping between control IV and reference
1722          index.  Although that should be true, we check against (array)
1723          index of data reference.  Like segment length, index length is
1724          linear function of the number of iterations with index_step as
1725          the coefficient, i.e, niter_len * idx_step.  */
1726       tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1727                                    build_int_cst (TREE_TYPE (min1),
1728                                                   niter_len1));
1729       tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1730                                    build_int_cst (TREE_TYPE (min2),
1731                                                   niter_len2));
1732       tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
1733       tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
1734       /* Adjust ranges for negative step.  */
1735       if (neg_step)
1736         {
1737           min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step);
1738           max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1),
1739                               CHREC_LEFT (access1), idx_step);
1740           min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step);
1741           max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2),
1742                               CHREC_LEFT (access2), idx_step);
1743         }
1744       tree part_cond_expr
1745         = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1746             fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
1747             fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
1748       if (*cond_expr)
1749         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1750                                   *cond_expr, part_cond_expr);
1751       else
1752         *cond_expr = part_cond_expr;
1753     }
1754   return true;
1755 }
1756
1757 /* Given two data references and segment lengths described by DR_A and DR_B,
1758    create expression checking if the two addresses ranges intersect with
1759    each other:
1760
1761      ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
1762      || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0))  */
1763
1764 static void
1765 create_intersect_range_checks (struct loop *loop, tree *cond_expr,
1766                                const dr_with_seg_len& dr_a,
1767                                const dr_with_seg_len& dr_b)
1768 {
1769   *cond_expr = NULL_TREE;
1770   if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
1771     return;
1772
1773   tree segment_length_a = dr_a.seg_len;
1774   tree segment_length_b = dr_b.seg_len;
1775   tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
1776   tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
1777   tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
1778
1779   offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
1780                           offset_a, DR_INIT (dr_a.dr));
1781   offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
1782                           offset_b, DR_INIT (dr_b.dr));
1783   addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
1784   addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
1785
1786   tree seg_a_min = addr_base_a;
1787   tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
1788   /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
1789      bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
1790      [a, a+12) */
1791   if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
1792     {
1793       tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
1794       seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
1795       seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
1796     }
1797
1798   tree seg_b_min = addr_base_b;
1799   tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
1800   if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
1801     {
1802       tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
1803       seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
1804       seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
1805     }
1806   *cond_expr
1807     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1808         fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
1809         fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
1810 }
1811
1812 /* Create a conditional expression that represents the run-time checks for
1813    overlapping of address ranges represented by a list of data references
1814    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
1815    COND_EXPR is the conditional expression to be used in the if statement
1816    that controls which version of the loop gets executed at runtime.  */
1817
1818 void
1819 create_runtime_alias_checks (struct loop *loop,
1820                              vec<dr_with_seg_len_pair_t> *alias_pairs,
1821                              tree * cond_expr)
1822 {
1823   tree part_cond_expr;
1824
1825   for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
1826     {
1827       const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
1828       const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
1829
1830       if (dump_enabled_p ())
1831         {
1832           dump_printf (MSG_NOTE, "create runtime check for data references ");
1833           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
1834           dump_printf (MSG_NOTE, " and ");
1835           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
1836           dump_printf (MSG_NOTE, "\n");
1837         }
1838
1839       /* Create condition expression for each pair data references.  */
1840       create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
1841       if (*cond_expr)
1842         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1843                                   *cond_expr, part_cond_expr);
1844       else
1845         *cond_expr = part_cond_expr;
1846     }
1847 }
1848
1849 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1850    expressions.  */
1851 static bool
1852 dr_equal_offsets_p1 (tree offset1, tree offset2)
1853 {
1854   bool res;
1855
1856   STRIP_NOPS (offset1);
1857   STRIP_NOPS (offset2);
1858
1859   if (offset1 == offset2)
1860     return true;
1861
1862   if (TREE_CODE (offset1) != TREE_CODE (offset2)
1863       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1864     return false;
1865
1866   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1867                              TREE_OPERAND (offset2, 0));
1868
1869   if (!res || !BINARY_CLASS_P (offset1))
1870     return res;
1871
1872   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1873                              TREE_OPERAND (offset2, 1));
1874
1875   return res;
1876 }
1877
1878 /* Check if DRA and DRB have equal offsets.  */
1879 bool
1880 dr_equal_offsets_p (struct data_reference *dra,
1881                     struct data_reference *drb)
1882 {
1883   tree offset1, offset2;
1884
1885   offset1 = DR_OFFSET (dra);
1886   offset2 = DR_OFFSET (drb);
1887
1888   return dr_equal_offsets_p1 (offset1, offset2);
1889 }
1890
1891 /* Returns true if FNA == FNB.  */
1892
1893 static bool
1894 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1895 {
1896   unsigned i, n = fna.length ();
1897
1898   if (n != fnb.length ())
1899     return false;
1900
1901   for (i = 0; i < n; i++)
1902     if (!operand_equal_p (fna[i], fnb[i], 0))
1903       return false;
1904
1905   return true;
1906 }
1907
1908 /* If all the functions in CF are the same, returns one of them,
1909    otherwise returns NULL.  */
1910
1911 static affine_fn
1912 common_affine_function (conflict_function *cf)
1913 {
1914   unsigned i;
1915   affine_fn comm;
1916
1917   if (!CF_NONTRIVIAL_P (cf))
1918     return affine_fn ();
1919
1920   comm = cf->fns[0];
1921
1922   for (i = 1; i < cf->n; i++)
1923     if (!affine_function_equal_p (comm, cf->fns[i]))
1924       return affine_fn ();
1925
1926   return comm;
1927 }
1928
1929 /* Returns the base of the affine function FN.  */
1930
1931 static tree
1932 affine_function_base (affine_fn fn)
1933 {
1934   return fn[0];
1935 }
1936
1937 /* Returns true if FN is a constant.  */
1938
1939 static bool
1940 affine_function_constant_p (affine_fn fn)
1941 {
1942   unsigned i;
1943   tree coef;
1944
1945   for (i = 1; fn.iterate (i, &coef); i++)
1946     if (!integer_zerop (coef))
1947       return false;
1948
1949   return true;
1950 }
1951
1952 /* Returns true if FN is the zero constant function.  */
1953
1954 static bool
1955 affine_function_zero_p (affine_fn fn)
1956 {
1957   return (integer_zerop (affine_function_base (fn))
1958           && affine_function_constant_p (fn));
1959 }
1960
1961 /* Returns a signed integer type with the largest precision from TA
1962    and TB.  */
1963
1964 static tree
1965 signed_type_for_types (tree ta, tree tb)
1966 {
1967   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1968     return signed_type_for (ta);
1969   else
1970     return signed_type_for (tb);
1971 }
1972
1973 /* Applies operation OP on affine functions FNA and FNB, and returns the
1974    result.  */
1975
1976 static affine_fn
1977 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1978 {
1979   unsigned i, n, m;
1980   affine_fn ret;
1981   tree coef;
1982
1983   if (fnb.length () > fna.length ())
1984     {
1985       n = fna.length ();
1986       m = fnb.length ();
1987     }
1988   else
1989     {
1990       n = fnb.length ();
1991       m = fna.length ();
1992     }
1993
1994   ret.create (m);
1995   for (i = 0; i < n; i++)
1996     {
1997       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
1998                                          TREE_TYPE (fnb[i]));
1999       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2000     }
2001
2002   for (; fna.iterate (i, &coef); i++)
2003     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2004                                  coef, integer_zero_node));
2005   for (; fnb.iterate (i, &coef); i++)
2006     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2007                                  integer_zero_node, coef));
2008
2009   return ret;
2010 }
2011
2012 /* Returns the sum of affine functions FNA and FNB.  */
2013
2014 static affine_fn
2015 affine_fn_plus (affine_fn fna, affine_fn fnb)
2016 {
2017   return affine_fn_op (PLUS_EXPR, fna, fnb);
2018 }
2019
2020 /* Returns the difference of affine functions FNA and FNB.  */
2021
2022 static affine_fn
2023 affine_fn_minus (affine_fn fna, affine_fn fnb)
2024 {
2025   return affine_fn_op (MINUS_EXPR, fna, fnb);
2026 }
2027
2028 /* Frees affine function FN.  */
2029
2030 static void
2031 affine_fn_free (affine_fn fn)
2032 {
2033   fn.release ();
2034 }
2035
2036 /* Determine for each subscript in the data dependence relation DDR
2037    the distance.  */
2038
2039 static void
2040 compute_subscript_distance (struct data_dependence_relation *ddr)
2041 {
2042   conflict_function *cf_a, *cf_b;
2043   affine_fn fn_a, fn_b, diff;
2044
2045   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2046     {
2047       unsigned int i;
2048
2049       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2050         {
2051           struct subscript *subscript;
2052
2053           subscript = DDR_SUBSCRIPT (ddr, i);
2054           cf_a = SUB_CONFLICTS_IN_A (subscript);
2055           cf_b = SUB_CONFLICTS_IN_B (subscript);
2056
2057           fn_a = common_affine_function (cf_a);
2058           fn_b = common_affine_function (cf_b);
2059           if (!fn_a.exists () || !fn_b.exists ())
2060             {
2061               SUB_DISTANCE (subscript) = chrec_dont_know;
2062               return;
2063             }
2064           diff = affine_fn_minus (fn_a, fn_b);
2065
2066           if (affine_function_constant_p (diff))
2067             SUB_DISTANCE (subscript) = affine_function_base (diff);
2068           else
2069             SUB_DISTANCE (subscript) = chrec_dont_know;
2070
2071           affine_fn_free (diff);
2072         }
2073     }
2074 }
2075
2076 /* Returns the conflict function for "unknown".  */
2077
2078 static conflict_function *
2079 conflict_fn_not_known (void)
2080 {
2081   conflict_function *fn = XCNEW (conflict_function);
2082   fn->n = NOT_KNOWN;
2083
2084   return fn;
2085 }
2086
2087 /* Returns the conflict function for "independent".  */
2088
2089 static conflict_function *
2090 conflict_fn_no_dependence (void)
2091 {
2092   conflict_function *fn = XCNEW (conflict_function);
2093   fn->n = NO_DEPENDENCE;
2094
2095   return fn;
2096 }
2097
2098 /* Returns true if the address of OBJ is invariant in LOOP.  */
2099
2100 static bool
2101 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
2102 {
2103   while (handled_component_p (obj))
2104     {
2105       if (TREE_CODE (obj) == ARRAY_REF)
2106         {
2107           /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
2108              need to check the stride and the lower bound of the reference.  */
2109           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2110                                                       loop->num)
2111               || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
2112                                                          loop->num))
2113             return false;
2114         }
2115       else if (TREE_CODE (obj) == COMPONENT_REF)
2116         {
2117           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2118                                                       loop->num))
2119             return false;
2120         }
2121       obj = TREE_OPERAND (obj, 0);
2122     }
2123
2124   if (!INDIRECT_REF_P (obj)
2125       && TREE_CODE (obj) != MEM_REF)
2126     return true;
2127
2128   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2129                                                   loop->num);
2130 }
2131
2132 /* Returns false if we can prove that data references A and B do not alias,
2133    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2134    considered.  */
2135
2136 bool
2137 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2138                 bool loop_nest)
2139 {
2140   tree addr_a = DR_BASE_OBJECT (a);
2141   tree addr_b = DR_BASE_OBJECT (b);
2142
2143   /* If we are not processing a loop nest but scalar code we
2144      do not need to care about possible cross-iteration dependences
2145      and thus can process the full original reference.  Do so,
2146      similar to how loop invariant motion applies extra offset-based
2147      disambiguation.  */
2148   if (!loop_nest)
2149     {
2150       aff_tree off1, off2;
2151       poly_widest_int size1, size2;
2152       get_inner_reference_aff (DR_REF (a), &off1, &size1);
2153       get_inner_reference_aff (DR_REF (b), &off2, &size2);
2154       aff_combination_scale (&off1, -1);
2155       aff_combination_add (&off2, &off1);
2156       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2157         return false;
2158     }
2159
2160   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2161       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2162       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2163       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2164     return false;
2165
2166   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2167      do not know the size of the base-object.  So we cannot do any
2168      offset/overlap based analysis but have to rely on points-to
2169      information only.  */
2170   if (TREE_CODE (addr_a) == MEM_REF
2171       && (DR_UNCONSTRAINED_BASE (a)
2172           || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2173     {
2174       /* For true dependences we can apply TBAA.  */
2175       if (flag_strict_aliasing
2176           && DR_IS_WRITE (a) && DR_IS_READ (b)
2177           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2178                                      get_alias_set (DR_REF (b))))
2179         return false;
2180       if (TREE_CODE (addr_b) == MEM_REF)
2181         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2182                                        TREE_OPERAND (addr_b, 0));
2183       else
2184         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2185                                        build_fold_addr_expr (addr_b));
2186     }
2187   else if (TREE_CODE (addr_b) == MEM_REF
2188            && (DR_UNCONSTRAINED_BASE (b)
2189                || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
2190     {
2191       /* For true dependences we can apply TBAA.  */
2192       if (flag_strict_aliasing
2193           && DR_IS_WRITE (a) && DR_IS_READ (b)
2194           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2195                                      get_alias_set (DR_REF (b))))
2196         return false;
2197       if (TREE_CODE (addr_a) == MEM_REF)
2198         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2199                                        TREE_OPERAND (addr_b, 0));
2200       else
2201         return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
2202                                        TREE_OPERAND (addr_b, 0));
2203     }
2204
2205   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2206      that is being subsetted in the loop nest.  */
2207   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2208     return refs_output_dependent_p (addr_a, addr_b);
2209   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2210     return refs_anti_dependent_p (addr_a, addr_b);
2211   return refs_may_alias_p (addr_a, addr_b);
2212 }
2213
2214 /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
2215    if it is meaningful to compare their associated access functions
2216    when checking for dependencies.  */
2217
2218 static bool
2219 access_fn_components_comparable_p (tree ref_a, tree ref_b)
2220 {
2221   /* Allow pairs of component refs from the following sets:
2222
2223        { REALPART_EXPR, IMAGPART_EXPR }
2224        { COMPONENT_REF }
2225        { ARRAY_REF }.  */
2226   tree_code code_a = TREE_CODE (ref_a);
2227   tree_code code_b = TREE_CODE (ref_b);
2228   if (code_a == IMAGPART_EXPR)
2229     code_a = REALPART_EXPR;
2230   if (code_b == IMAGPART_EXPR)
2231     code_b = REALPART_EXPR;
2232   if (code_a != code_b)
2233     return false;
2234
2235   if (TREE_CODE (ref_a) == COMPONENT_REF)
2236     /* ??? We cannot simply use the type of operand #0 of the refs here as
2237        the Fortran compiler smuggles type punning into COMPONENT_REFs.
2238        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
2239     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
2240             == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
2241
2242   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
2243                              TREE_TYPE (TREE_OPERAND (ref_b, 0)));
2244 }
2245
2246 /* Initialize a data dependence relation between data accesses A and
2247    B.  NB_LOOPS is the number of loops surrounding the references: the
2248    size of the classic distance/direction vectors.  */
2249
2250 struct data_dependence_relation *
2251 initialize_data_dependence_relation (struct data_reference *a,
2252                                      struct data_reference *b,
2253                                      vec<loop_p> loop_nest)
2254 {
2255   struct data_dependence_relation *res;
2256   unsigned int i;
2257
2258   res = XCNEW (struct data_dependence_relation);
2259   DDR_A (res) = a;
2260   DDR_B (res) = b;
2261   DDR_LOOP_NEST (res).create (0);
2262   DDR_SUBSCRIPTS (res).create (0);
2263   DDR_DIR_VECTS (res).create (0);
2264   DDR_DIST_VECTS (res).create (0);
2265
2266   if (a == NULL || b == NULL)
2267     {
2268       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2269       return res;
2270     }
2271
2272   /* If the data references do not alias, then they are independent.  */
2273   if (!dr_may_alias_p (a, b, loop_nest.exists ()))
2274     {
2275       DDR_ARE_DEPENDENT (res) = chrec_known;
2276       return res;
2277     }
2278
2279   unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
2280   unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
2281   if (num_dimensions_a == 0 || num_dimensions_b == 0)
2282     {
2283       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2284       return res;
2285     }
2286
2287   /* For unconstrained bases, the root (highest-indexed) subscript
2288      describes a variation in the base of the original DR_REF rather
2289      than a component access.  We have no type that accurately describes
2290      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
2291      applying this subscript) so limit the search to the last real
2292      component access.
2293
2294      E.g. for:
2295
2296         void
2297         f (int a[][8], int b[][8])
2298         {
2299           for (int i = 0; i < 8; ++i)
2300             a[i * 2][0] = b[i][0];
2301         }
2302
2303      the a and b accesses have a single ARRAY_REF component reference [0]
2304      but have two subscripts.  */
2305   if (DR_UNCONSTRAINED_BASE (a))
2306     num_dimensions_a -= 1;
2307   if (DR_UNCONSTRAINED_BASE (b))
2308     num_dimensions_b -= 1;
2309
2310   /* These structures describe sequences of component references in
2311      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
2312      specific access function.  */
2313   struct {
2314     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
2315        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
2316        indices.  In C notation, these are the indices of the rightmost
2317        component references; e.g. for a sequence .b.c.d, the start
2318        index is for .d.  */
2319     unsigned int start_a;
2320     unsigned int start_b;
2321
2322     /* The sequence contains LENGTH consecutive access functions from
2323        each DR.  */
2324     unsigned int length;
2325
2326     /* The enclosing objects for the A and B sequences respectively,
2327        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
2328        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
2329     tree object_a;
2330     tree object_b;
2331   } full_seq = {}, struct_seq = {};
2332
2333   /* Before each iteration of the loop:
2334
2335      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
2336      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
2337   unsigned int index_a = 0;
2338   unsigned int index_b = 0;
2339   tree ref_a = DR_REF (a);
2340   tree ref_b = DR_REF (b);
2341
2342   /* Now walk the component references from the final DR_REFs back up to
2343      the enclosing base objects.  Each component reference corresponds
2344      to one access function in the DR, with access function 0 being for
2345      the final DR_REF and the highest-indexed access function being the
2346      one that is applied to the base of the DR.
2347
2348      Look for a sequence of component references whose access functions
2349      are comparable (see access_fn_components_comparable_p).  If more
2350      than one such sequence exists, pick the one nearest the base
2351      (which is the leftmost sequence in C notation).  Store this sequence
2352      in FULL_SEQ.
2353
2354      For example, if we have:
2355
2356         struct foo { struct bar s; ... } (*a)[10], (*b)[10];
2357
2358         A: a[0][i].s.c.d
2359         B: __real b[0][i].s.e[i].f
2360
2361      (where d is the same type as the real component of f) then the access
2362      functions would be:
2363
2364                          0   1   2   3
2365         A:              .d  .c  .s [i]
2366
2367                  0   1   2   3   4   5
2368         B:  __real  .f [i]  .e  .s [i]
2369
2370      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
2371      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
2372      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
2373      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
2374      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
2375      index foo[10] arrays, so is again comparable.  The sequence is
2376      therefore:
2377
2378         A: [1, 3]  (i.e. [i].s.c)
2379         B: [3, 5]  (i.e. [i].s.e)
2380
2381      Also look for sequences of component references whose access
2382      functions are comparable and whose enclosing objects have the same
2383      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
2384      example, STRUCT_SEQ would be:
2385
2386         A: [1, 2]  (i.e. s.c)
2387         B: [3, 4]  (i.e. s.e)  */
2388   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
2389     {
2390       /* REF_A and REF_B must be one of the component access types
2391          allowed by dr_analyze_indices.  */
2392       gcc_checking_assert (access_fn_component_p (ref_a));
2393       gcc_checking_assert (access_fn_component_p (ref_b));
2394
2395       /* Get the immediately-enclosing objects for REF_A and REF_B,
2396          i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
2397          and DR_ACCESS_FN (B, INDEX_B).  */
2398       tree object_a = TREE_OPERAND (ref_a, 0);
2399       tree object_b = TREE_OPERAND (ref_b, 0);
2400
2401       tree type_a = TREE_TYPE (object_a);
2402       tree type_b = TREE_TYPE (object_b);
2403       if (access_fn_components_comparable_p (ref_a, ref_b))
2404         {
2405           /* This pair of component accesses is comparable for dependence
2406              analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
2407              DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
2408           if (full_seq.start_a + full_seq.length != index_a
2409               || full_seq.start_b + full_seq.length != index_b)
2410             {
2411               /* The accesses don't extend the current sequence,
2412                  so start a new one here.  */
2413               full_seq.start_a = index_a;
2414               full_seq.start_b = index_b;
2415               full_seq.length = 0;
2416             }
2417
2418           /* Add this pair of references to the sequence.  */
2419           full_seq.length += 1;
2420           full_seq.object_a = object_a;
2421           full_seq.object_b = object_b;
2422
2423           /* If the enclosing objects are structures (and thus have the
2424              same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
2425           if (TREE_CODE (type_a) == RECORD_TYPE)
2426             struct_seq = full_seq;
2427
2428           /* Move to the next containing reference for both A and B.  */
2429           ref_a = object_a;
2430           ref_b = object_b;
2431           index_a += 1;
2432           index_b += 1;
2433           continue;
2434         }
2435
2436       /* Try to approach equal type sizes.  */
2437       if (!COMPLETE_TYPE_P (type_a)
2438           || !COMPLETE_TYPE_P (type_b)
2439           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
2440           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
2441         break;
2442
2443       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
2444       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
2445       if (size_a <= size_b)
2446         {
2447           index_a += 1;
2448           ref_a = object_a;
2449         }
2450       if (size_b <= size_a)
2451         {
2452           index_b += 1;
2453           ref_b = object_b;
2454         }
2455     }
2456
2457   /* See whether FULL_SEQ ends at the base and whether the two bases
2458      are equal.  We do not care about TBAA or alignment info so we can
2459      use OEP_ADDRESS_OF to avoid false negatives.  */
2460   tree base_a = DR_BASE_OBJECT (a);
2461   tree base_b = DR_BASE_OBJECT (b);
2462   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
2463                       && full_seq.start_b + full_seq.length == num_dimensions_b
2464                       && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
2465                       && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
2466                       && types_compatible_p (TREE_TYPE (base_a),
2467                                              TREE_TYPE (base_b))
2468                       && (!loop_nest.exists ()
2469                           || (object_address_invariant_in_loop_p
2470                               (loop_nest[0], base_a))));
2471
2472   /* If the bases are the same, we can include the base variation too.
2473      E.g. the b accesses in:
2474
2475        for (int i = 0; i < n; ++i)
2476          b[i + 4][0] = b[i][0];
2477
2478      have a definite dependence distance of 4, while for:
2479
2480        for (int i = 0; i < n; ++i)
2481          a[i + 4][0] = b[i][0];
2482
2483      the dependence distance depends on the gap between a and b.
2484
2485      If the bases are different then we can only rely on the sequence
2486      rooted at a structure access, since arrays are allowed to overlap
2487      arbitrarily and change shape arbitrarily.  E.g. we treat this as
2488      valid code:
2489
2490        int a[256];
2491        ...
2492        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
2493
2494      where two lvalues with the same int[4][3] type overlap, and where
2495      both lvalues are distinct from the object's declared type.  */
2496   if (same_base_p)
2497     {
2498       if (DR_UNCONSTRAINED_BASE (a))
2499         full_seq.length += 1;
2500     }
2501   else
2502     full_seq = struct_seq;
2503
2504   /* Punt if we didn't find a suitable sequence.  */
2505   if (full_seq.length == 0)
2506     {
2507       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2508       return res;
2509     }
2510
2511   if (!same_base_p)
2512     {
2513       /* Partial overlap is possible for different bases when strict aliasing
2514          is not in effect.  It's also possible if either base involves a union
2515          access; e.g. for:
2516
2517            struct s1 { int a[2]; };
2518            struct s2 { struct s1 b; int c; };
2519            struct s3 { int d; struct s1 e; };
2520            union u { struct s2 f; struct s3 g; } *p, *q;
2521
2522          the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
2523          "p->g.e" (base "p->g") and might partially overlap the s1 at
2524          "q->g.e" (base "q->g").  */
2525       if (!flag_strict_aliasing
2526           || ref_contains_union_access_p (full_seq.object_a)
2527           || ref_contains_union_access_p (full_seq.object_b))
2528         {
2529           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2530           return res;
2531         }
2532
2533       DDR_COULD_BE_INDEPENDENT_P (res) = true;
2534       if (!loop_nest.exists ()
2535           || (object_address_invariant_in_loop_p (loop_nest[0],
2536                                                   full_seq.object_a)
2537               && object_address_invariant_in_loop_p (loop_nest[0],
2538                                                      full_seq.object_b)))
2539         {
2540           DDR_OBJECT_A (res) = full_seq.object_a;
2541           DDR_OBJECT_B (res) = full_seq.object_b;
2542         }
2543     }
2544
2545   DDR_AFFINE_P (res) = true;
2546   DDR_ARE_DEPENDENT (res) = NULL_TREE;
2547   DDR_SUBSCRIPTS (res).create (full_seq.length);
2548   DDR_LOOP_NEST (res) = loop_nest;
2549   DDR_INNER_LOOP (res) = 0;
2550   DDR_SELF_REFERENCE (res) = false;
2551
2552   for (i = 0; i < full_seq.length; ++i)
2553     {
2554       struct subscript *subscript;
2555
2556       subscript = XNEW (struct subscript);
2557       SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
2558       SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
2559       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
2560       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
2561       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
2562       SUB_DISTANCE (subscript) = chrec_dont_know;
2563       DDR_SUBSCRIPTS (res).safe_push (subscript);
2564     }
2565
2566   return res;
2567 }
2568
2569 /* Frees memory used by the conflict function F.  */
2570
2571 static void
2572 free_conflict_function (conflict_function *f)
2573 {
2574   unsigned i;
2575
2576   if (CF_NONTRIVIAL_P (f))
2577     {
2578       for (i = 0; i < f->n; i++)
2579         affine_fn_free (f->fns[i]);
2580     }
2581   free (f);
2582 }
2583
2584 /* Frees memory used by SUBSCRIPTS.  */
2585
2586 static void
2587 free_subscripts (vec<subscript_p> subscripts)
2588 {
2589   unsigned i;
2590   subscript_p s;
2591
2592   FOR_EACH_VEC_ELT (subscripts, i, s)
2593     {
2594       free_conflict_function (s->conflicting_iterations_in_a);
2595       free_conflict_function (s->conflicting_iterations_in_b);
2596       free (s);
2597     }
2598   subscripts.release ();
2599 }
2600
2601 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
2602    description.  */
2603
2604 static inline void
2605 finalize_ddr_dependent (struct data_dependence_relation *ddr,
2606                         tree chrec)
2607 {
2608   DDR_ARE_DEPENDENT (ddr) = chrec;
2609   free_subscripts (DDR_SUBSCRIPTS (ddr));
2610   DDR_SUBSCRIPTS (ddr).create (0);
2611 }
2612
2613 /* The dependence relation DDR cannot be represented by a distance
2614    vector.  */
2615
2616 static inline void
2617 non_affine_dependence_relation (struct data_dependence_relation *ddr)
2618 {
2619   if (dump_file && (dump_flags & TDF_DETAILS))
2620     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
2621
2622   DDR_AFFINE_P (ddr) = false;
2623 }
2624
2625 \f
2626
2627 /* This section contains the classic Banerjee tests.  */
2628
2629 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
2630    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
2631
2632 static inline bool
2633 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2634 {
2635   return (evolution_function_is_constant_p (chrec_a)
2636           && evolution_function_is_constant_p (chrec_b));
2637 }
2638
2639 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
2640    variable, i.e., if the SIV (Single Index Variable) test is true.  */
2641
2642 static bool
2643 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2644 {
2645   if ((evolution_function_is_constant_p (chrec_a)
2646        && evolution_function_is_univariate_p (chrec_b))
2647       || (evolution_function_is_constant_p (chrec_b)
2648           && evolution_function_is_univariate_p (chrec_a)))
2649     return true;
2650
2651   if (evolution_function_is_univariate_p (chrec_a)
2652       && evolution_function_is_univariate_p (chrec_b))
2653     {
2654       switch (TREE_CODE (chrec_a))
2655         {
2656         case POLYNOMIAL_CHREC:
2657           switch (TREE_CODE (chrec_b))
2658             {
2659             case POLYNOMIAL_CHREC:
2660               if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
2661                 return false;
2662               /* FALLTHRU */
2663
2664             default:
2665               return true;
2666             }
2667
2668         default:
2669           return true;
2670         }
2671     }
2672
2673   return false;
2674 }
2675
2676 /* Creates a conflict function with N dimensions.  The affine functions
2677    in each dimension follow.  */
2678
2679 static conflict_function *
2680 conflict_fn (unsigned n, ...)
2681 {
2682   unsigned i;
2683   conflict_function *ret = XCNEW (conflict_function);
2684   va_list ap;
2685
2686   gcc_assert (n > 0 && n <= MAX_DIM);
2687   va_start (ap, n);
2688
2689   ret->n = n;
2690   for (i = 0; i < n; i++)
2691     ret->fns[i] = va_arg (ap, affine_fn);
2692   va_end (ap);
2693
2694   return ret;
2695 }
2696
2697 /* Returns constant affine function with value CST.  */
2698
2699 static affine_fn
2700 affine_fn_cst (tree cst)
2701 {
2702   affine_fn fn;
2703   fn.create (1);
2704   fn.quick_push (cst);
2705   return fn;
2706 }
2707
2708 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
2709
2710 static affine_fn
2711 affine_fn_univar (tree cst, unsigned dim, tree coef)
2712 {
2713   affine_fn fn;
2714   fn.create (dim + 1);
2715   unsigned i;
2716
2717   gcc_assert (dim > 0);
2718   fn.quick_push (cst);
2719   for (i = 1; i < dim; i++)
2720     fn.quick_push (integer_zero_node);
2721   fn.quick_push (coef);
2722   return fn;
2723 }
2724
2725 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
2726    *OVERLAPS_B are initialized to the functions that describe the
2727    relation between the elements accessed twice by CHREC_A and
2728    CHREC_B.  For k >= 0, the following property is verified:
2729
2730    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2731
2732 static void
2733 analyze_ziv_subscript (tree chrec_a,
2734                        tree chrec_b,
2735                        conflict_function **overlaps_a,
2736                        conflict_function **overlaps_b,
2737                        tree *last_conflicts)
2738 {
2739   tree type, difference;
2740   dependence_stats.num_ziv++;
2741
2742   if (dump_file && (dump_flags & TDF_DETAILS))
2743     fprintf (dump_file, "(analyze_ziv_subscript \n");
2744
2745   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2746   chrec_a = chrec_convert (type, chrec_a, NULL);
2747   chrec_b = chrec_convert (type, chrec_b, NULL);
2748   difference = chrec_fold_minus (type, chrec_a, chrec_b);
2749
2750   switch (TREE_CODE (difference))
2751     {
2752     case INTEGER_CST:
2753       if (integer_zerop (difference))
2754         {
2755           /* The difference is equal to zero: the accessed index
2756              overlaps for each iteration in the loop.  */
2757           *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2758           *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2759           *last_conflicts = chrec_dont_know;
2760           dependence_stats.num_ziv_dependent++;
2761         }
2762       else
2763         {
2764           /* The accesses do not overlap.  */
2765           *overlaps_a = conflict_fn_no_dependence ();
2766           *overlaps_b = conflict_fn_no_dependence ();
2767           *last_conflicts = integer_zero_node;
2768           dependence_stats.num_ziv_independent++;
2769         }
2770       break;
2771
2772     default:
2773       /* We're not sure whether the indexes overlap.  For the moment,
2774          conservatively answer "don't know".  */
2775       if (dump_file && (dump_flags & TDF_DETAILS))
2776         fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
2777
2778       *overlaps_a = conflict_fn_not_known ();
2779       *overlaps_b = conflict_fn_not_known ();
2780       *last_conflicts = chrec_dont_know;
2781       dependence_stats.num_ziv_unimplemented++;
2782       break;
2783     }
2784
2785   if (dump_file && (dump_flags & TDF_DETAILS))
2786     fprintf (dump_file, ")\n");
2787 }
2788
2789 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
2790    and only if it fits to the int type.  If this is not the case, or the
2791    bound  on the number of iterations of LOOP could not be derived, returns
2792    chrec_dont_know.  */
2793
2794 static tree
2795 max_stmt_executions_tree (struct loop *loop)
2796 {
2797   widest_int nit;
2798
2799   if (!max_stmt_executions (loop, &nit))
2800     return chrec_dont_know;
2801
2802   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
2803     return chrec_dont_know;
2804
2805   return wide_int_to_tree (unsigned_type_node, nit);
2806 }
2807
2808 /* Determine whether the CHREC is always positive/negative.  If the expression
2809    cannot be statically analyzed, return false, otherwise set the answer into
2810    VALUE.  */
2811
2812 static bool
2813 chrec_is_positive (tree chrec, bool *value)
2814 {
2815   bool value0, value1, value2;
2816   tree end_value, nb_iter;
2817
2818   switch (TREE_CODE (chrec))
2819     {
2820     case POLYNOMIAL_CHREC:
2821       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
2822           || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
2823         return false;
2824
2825       /* FIXME -- overflows.  */
2826       if (value0 == value1)
2827         {
2828           *value = value0;
2829           return true;
2830         }
2831
2832       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
2833          and the proof consists in showing that the sign never
2834          changes during the execution of the loop, from 0 to
2835          loop->nb_iterations.  */
2836       if (!evolution_function_is_affine_p (chrec))
2837         return false;
2838
2839       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
2840       if (chrec_contains_undetermined (nb_iter))
2841         return false;
2842
2843 #if 0
2844       /* TODO -- If the test is after the exit, we may decrease the number of
2845          iterations by one.  */
2846       if (after_exit)
2847         nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
2848 #endif
2849
2850       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
2851
2852       if (!chrec_is_positive (end_value, &value2))
2853         return false;
2854
2855       *value = value0;
2856       return value0 == value1;
2857
2858     case INTEGER_CST:
2859       switch (tree_int_cst_sgn (chrec))
2860         {
2861         case -1:
2862           *value = false;
2863           break;
2864         case 1:
2865           *value = true;
2866           break;
2867         default:
2868           return false;
2869         }
2870       return true;
2871
2872     default:
2873       return false;
2874     }
2875 }
2876
2877
2878 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
2879    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
2880    *OVERLAPS_B are initialized to the functions that describe the
2881    relation between the elements accessed twice by CHREC_A and
2882    CHREC_B.  For k >= 0, the following property is verified:
2883
2884    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2885
2886 static void
2887 analyze_siv_subscript_cst_affine (tree chrec_a,
2888                                   tree chrec_b,
2889                                   conflict_function **overlaps_a,
2890                                   conflict_function **overlaps_b,
2891                                   tree *last_conflicts)
2892 {
2893   bool value0, value1, value2;
2894   tree type, difference, tmp;
2895
2896   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2897   chrec_a = chrec_convert (type, chrec_a, NULL);
2898   chrec_b = chrec_convert (type, chrec_b, NULL);
2899   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
2900
2901   /* Special case overlap in the first iteration.  */
2902   if (integer_zerop (difference))
2903     {
2904       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2905       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2906       *last_conflicts = integer_one_node;
2907       return;
2908     }
2909
2910   if (!chrec_is_positive (initial_condition (difference), &value0))
2911     {
2912       if (dump_file && (dump_flags & TDF_DETAILS))
2913         fprintf (dump_file, "siv test failed: chrec is not positive.\n");
2914
2915       dependence_stats.num_siv_unimplemented++;
2916       *overlaps_a = conflict_fn_not_known ();
2917       *overlaps_b = conflict_fn_not_known ();
2918       *last_conflicts = chrec_dont_know;
2919       return;
2920     }
2921   else
2922     {
2923       if (value0 == false)
2924         {
2925           if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
2926             {
2927               if (dump_file && (dump_flags & TDF_DETAILS))
2928                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
2929
2930               *overlaps_a = conflict_fn_not_known ();
2931               *overlaps_b = conflict_fn_not_known ();
2932               *last_conflicts = chrec_dont_know;
2933               dependence_stats.num_siv_unimplemented++;
2934               return;
2935             }
2936           else
2937             {
2938               if (value1 == true)
2939                 {
2940                   /* Example:
2941                      chrec_a = 12
2942                      chrec_b = {10, +, 1}
2943                   */
2944
2945                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
2946                     {
2947                       HOST_WIDE_INT numiter;
2948                       struct loop *loop = get_chrec_loop (chrec_b);
2949
2950                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2951                       tmp = fold_build2 (EXACT_DIV_EXPR, type,
2952                                          fold_build1 (ABS_EXPR, type, difference),
2953                                          CHREC_RIGHT (chrec_b));
2954                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
2955                       *last_conflicts = integer_one_node;
2956
2957
2958                       /* Perform weak-zero siv test to see if overlap is
2959                          outside the loop bounds.  */
2960                       numiter = max_stmt_executions_int (loop);
2961
2962                       if (numiter >= 0
2963                           && compare_tree_int (tmp, numiter) > 0)
2964                         {
2965                           free_conflict_function (*overlaps_a);
2966                           free_conflict_function (*overlaps_b);
2967                           *overlaps_a = conflict_fn_no_dependence ();
2968                           *overlaps_b = conflict_fn_no_dependence ();
2969                           *last_conflicts = integer_zero_node;
2970                           dependence_stats.num_siv_independent++;
2971                           return;
2972                         }
2973                       dependence_stats.num_siv_dependent++;
2974                       return;
2975                     }
2976
2977                   /* When the step does not divide the difference, there are
2978                      no overlaps.  */
2979                   else
2980                     {
2981                       *overlaps_a = conflict_fn_no_dependence ();
2982                       *overlaps_b = conflict_fn_no_dependence ();
2983                       *last_conflicts = integer_zero_node;
2984                       dependence_stats.num_siv_independent++;
2985                       return;
2986                     }
2987                 }
2988
2989               else
2990                 {
2991                   /* Example:
2992                      chrec_a = 12
2993                      chrec_b = {10, +, -1}
2994
2995                      In this case, chrec_a will not overlap with chrec_b.  */
2996                   *overlaps_a = conflict_fn_no_dependence ();
2997                   *overlaps_b = conflict_fn_no_dependence ();
2998                   *last_conflicts = integer_zero_node;
2999                   dependence_stats.num_siv_independent++;
3000                   return;
3001                 }
3002             }
3003         }
3004       else
3005         {
3006           if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3007             {
3008               if (dump_file && (dump_flags & TDF_DETAILS))
3009                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3010
3011               *overlaps_a = conflict_fn_not_known ();
3012               *overlaps_b = conflict_fn_not_known ();
3013               *last_conflicts = chrec_dont_know;
3014               dependence_stats.num_siv_unimplemented++;
3015               return;
3016             }
3017           else
3018             {
3019               if (value2 == false)
3020                 {
3021                   /* Example:
3022                      chrec_a = 3
3023                      chrec_b = {10, +, -1}
3024                   */
3025                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3026                     {
3027                       HOST_WIDE_INT numiter;
3028                       struct loop *loop = get_chrec_loop (chrec_b);
3029
3030                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3031                       tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3032                                          CHREC_RIGHT (chrec_b));
3033                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3034                       *last_conflicts = integer_one_node;
3035
3036                       /* Perform weak-zero siv test to see if overlap is
3037                          outside the loop bounds.  */
3038                       numiter = max_stmt_executions_int (loop);
3039
3040                       if (numiter >= 0
3041                           && compare_tree_int (tmp, numiter) > 0)
3042                         {
3043                           free_conflict_function (*overlaps_a);
3044                           free_conflict_function (*overlaps_b);
3045                           *overlaps_a = conflict_fn_no_dependence ();
3046                           *overlaps_b = conflict_fn_no_dependence ();
3047                           *last_conflicts = integer_zero_node;
3048                           dependence_stats.num_siv_independent++;
3049                           return;
3050                         }
3051                       dependence_stats.num_siv_dependent++;
3052                       return;
3053                     }
3054
3055                   /* When the step does not divide the difference, there
3056                      are no overlaps.  */
3057                   else
3058                     {
3059                       *overlaps_a = conflict_fn_no_dependence ();
3060                       *overlaps_b = conflict_fn_no_dependence ();
3061                       *last_conflicts = integer_zero_node;
3062                       dependence_stats.num_siv_independent++;
3063                       return;
3064                     }
3065                 }
3066               else
3067                 {
3068                   /* Example:
3069                      chrec_a = 3
3070                      chrec_b = {4, +, 1}
3071
3072                      In this case, chrec_a will not overlap with chrec_b.  */
3073                   *overlaps_a = conflict_fn_no_dependence ();
3074                   *overlaps_b = conflict_fn_no_dependence ();
3075                   *last_conflicts = integer_zero_node;
3076                   dependence_stats.num_siv_independent++;
3077                   return;
3078                 }
3079             }
3080         }
3081     }
3082 }
3083
3084 /* Helper recursive function for initializing the matrix A.  Returns
3085    the initial value of CHREC.  */
3086
3087 static tree
3088 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3089 {
3090   gcc_assert (chrec);
3091
3092   switch (TREE_CODE (chrec))
3093     {
3094     case POLYNOMIAL_CHREC:
3095       A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
3096       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3097
3098     case PLUS_EXPR:
3099     case MULT_EXPR:
3100     case MINUS_EXPR:
3101       {
3102         tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3103         tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3104
3105         return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3106       }
3107
3108     CASE_CONVERT:
3109       {
3110         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3111         return chrec_convert (chrec_type (chrec), op, NULL);
3112       }
3113
3114     case BIT_NOT_EXPR:
3115       {
3116         /* Handle ~X as -1 - X.  */
3117         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3118         return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
3119                               build_int_cst (TREE_TYPE (chrec), -1), op);
3120       }
3121
3122     case INTEGER_CST:
3123       return chrec;
3124
3125     default:
3126       gcc_unreachable ();
3127       return NULL_TREE;
3128     }
3129 }
3130
3131 #define FLOOR_DIV(x,y) ((x) / (y))
3132
3133 /* Solves the special case of the Diophantine equation:
3134    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3135
3136    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
3137    number of iterations that loops X and Y run.  The overlaps will be
3138    constructed as evolutions in dimension DIM.  */
3139
3140 static void
3141 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
3142                                          HOST_WIDE_INT step_a,
3143                                          HOST_WIDE_INT step_b,
3144                                          affine_fn *overlaps_a,
3145                                          affine_fn *overlaps_b,
3146                                          tree *last_conflicts, int dim)
3147 {
3148   if (((step_a > 0 && step_b > 0)
3149        || (step_a < 0 && step_b < 0)))
3150     {
3151       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
3152       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
3153
3154       gcd_steps_a_b = gcd (step_a, step_b);
3155       step_overlaps_a = step_b / gcd_steps_a_b;
3156       step_overlaps_b = step_a / gcd_steps_a_b;
3157
3158       if (niter > 0)
3159         {
3160           tau2 = FLOOR_DIV (niter, step_overlaps_a);
3161           tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
3162           last_conflict = tau2;
3163           *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3164         }
3165       else
3166         *last_conflicts = chrec_dont_know;
3167
3168       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
3169                                       build_int_cst (NULL_TREE,
3170                                                      step_overlaps_a));
3171       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
3172                                       build_int_cst (NULL_TREE,
3173                                                      step_overlaps_b));
3174     }
3175
3176   else
3177     {
3178       *overlaps_a = affine_fn_cst (integer_zero_node);
3179       *overlaps_b = affine_fn_cst (integer_zero_node);
3180       *last_conflicts = integer_zero_node;
3181     }
3182 }
3183
3184 /* Solves the special case of a Diophantine equation where CHREC_A is
3185    an affine bivariate function, and CHREC_B is an affine univariate
3186    function.  For example,
3187
3188    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
3189
3190    has the following overlapping functions:
3191
3192    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
3193    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
3194    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
3195
3196    FORNOW: This is a specialized implementation for a case occurring in
3197    a common benchmark.  Implement the general algorithm.  */
3198
3199 static void
3200 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
3201                                       conflict_function **overlaps_a,
3202                                       conflict_function **overlaps_b,
3203                                       tree *last_conflicts)
3204 {
3205   bool xz_p, yz_p, xyz_p;
3206   HOST_WIDE_INT step_x, step_y, step_z;
3207   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
3208   affine_fn overlaps_a_xz, overlaps_b_xz;
3209   affine_fn overlaps_a_yz, overlaps_b_yz;
3210   affine_fn overlaps_a_xyz, overlaps_b_xyz;
3211   affine_fn ova1, ova2, ovb;
3212   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
3213
3214   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
3215   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
3216   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
3217
3218   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
3219   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
3220   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
3221
3222   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
3223     {
3224       if (dump_file && (dump_flags & TDF_DETAILS))
3225         fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
3226
3227       *overlaps_a = conflict_fn_not_known ();
3228       *overlaps_b = conflict_fn_not_known ();
3229       *last_conflicts = chrec_dont_know;
3230       return;
3231     }
3232
3233   niter = MIN (niter_x, niter_z);
3234   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
3235                                            &overlaps_a_xz,
3236                                            &overlaps_b_xz,
3237                                            &last_conflicts_xz, 1);
3238   niter = MIN (niter_y, niter_z);
3239   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
3240                                            &overlaps_a_yz,
3241                                            &overlaps_b_yz,
3242                                            &last_conflicts_yz, 2);
3243   niter = MIN (niter_x, niter_z);
3244   niter = MIN (niter_y, niter);
3245   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
3246                                            &overlaps_a_xyz,
3247                                            &overlaps_b_xyz,
3248                                            &last_conflicts_xyz, 3);
3249
3250   xz_p = !integer_zerop (last_conflicts_xz);
3251   yz_p = !integer_zerop (last_conflicts_yz);
3252   xyz_p = !integer_zerop (last_conflicts_xyz);
3253
3254   if (xz_p || yz_p || xyz_p)
3255     {
3256       ova1 = affine_fn_cst (integer_zero_node);
3257       ova2 = affine_fn_cst (integer_zero_node);
3258       ovb = affine_fn_cst (integer_zero_node);
3259       if (xz_p)
3260         {
3261           affine_fn t0 = ova1;
3262           affine_fn t2 = ovb;
3263
3264           ova1 = affine_fn_plus (ova1, overlaps_a_xz);
3265           ovb = affine_fn_plus (ovb, overlaps_b_xz);
3266           affine_fn_free (t0);
3267           affine_fn_free (t2);
3268           *last_conflicts = last_conflicts_xz;
3269         }
3270       if (yz_p)
3271         {
3272           affine_fn t0 = ova2;
3273           affine_fn t2 = ovb;
3274
3275           ova2 = affine_fn_plus (ova2, overlaps_a_yz);
3276           ovb = affine_fn_plus (ovb, overlaps_b_yz);
3277           affine_fn_free (t0);
3278           affine_fn_free (t2);
3279           *last_conflicts = last_conflicts_yz;
3280         }
3281       if (xyz_p)
3282         {
3283           affine_fn t0 = ova1;
3284           affine_fn t2 = ova2;
3285           affine_fn t4 = ovb;
3286
3287           ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
3288           ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
3289           ovb = affine_fn_plus (ovb, overlaps_b_xyz);
3290           affine_fn_free (t0);
3291           affine_fn_free (t2);
3292           affine_fn_free (t4);
3293           *last_conflicts = last_conflicts_xyz;
3294         }
3295       *overlaps_a = conflict_fn (2, ova1, ova2);
3296       *overlaps_b = conflict_fn (1, ovb);
3297     }
3298   else
3299     {
3300       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3301       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3302       *last_conflicts = integer_zero_node;
3303     }
3304
3305   affine_fn_free (overlaps_a_xz);
3306   affine_fn_free (overlaps_b_xz);
3307   affine_fn_free (overlaps_a_yz);
3308   affine_fn_free (overlaps_b_yz);
3309   affine_fn_free (overlaps_a_xyz);
3310   affine_fn_free (overlaps_b_xyz);
3311 }
3312
3313 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
3314
3315 static void
3316 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
3317                     int size)
3318 {
3319   memcpy (vec2, vec1, size * sizeof (*vec1));
3320 }
3321
3322 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
3323
3324 static void
3325 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
3326                     int m, int n)
3327 {
3328   int i;
3329
3330   for (i = 0; i < m; i++)
3331     lambda_vector_copy (mat1[i], mat2[i], n);
3332 }
3333
3334 /* Store the N x N identity matrix in MAT.  */
3335
3336 static void
3337 lambda_matrix_id (lambda_matrix mat, int size)
3338 {
3339   int i, j;
3340
3341   for (i = 0; i < size; i++)
3342     for (j = 0; j < size; j++)
3343       mat[i][j] = (i == j) ? 1 : 0;
3344 }
3345
3346 /* Return the first nonzero element of vector VEC1 between START and N.
3347    We must have START <= N.   Returns N if VEC1 is the zero vector.  */
3348
3349 static int
3350 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
3351 {
3352   int j = start;
3353   while (j < n && vec1[j] == 0)
3354     j++;
3355   return j;
3356 }
3357
3358 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3359    R2 = R2 + CONST1 * R1.  */
3360
3361 static void
3362 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
3363 {
3364   int i;
3365
3366   if (const1 == 0)
3367     return;
3368
3369   for (i = 0; i < n; i++)
3370     mat[r2][i] += const1 * mat[r1][i];
3371 }
3372
3373 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3374    and store the result in VEC2.  */
3375
3376 static void
3377 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
3378                           int size, int const1)
3379 {
3380   int i;
3381
3382   if (const1 == 0)
3383     lambda_vector_clear (vec2, size);
3384   else
3385     for (i = 0; i < size; i++)
3386       vec2[i] = const1 * vec1[i];
3387 }
3388
3389 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
3390
3391 static void
3392 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
3393                       int size)
3394 {
3395   lambda_vector_mult_const (vec1, vec2, size, -1);
3396 }
3397
3398 /* Negate row R1 of matrix MAT which has N columns.  */
3399
3400 static void
3401 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
3402 {
3403   lambda_vector_negate (mat[r1], mat[r1], n);
3404 }
3405
3406 /* Return true if two vectors are equal.  */
3407
3408 static bool
3409 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
3410 {
3411   int i;
3412   for (i = 0; i < size; i++)
3413     if (vec1[i] != vec2[i])
3414       return false;
3415   return true;
3416 }
3417
3418 /* Given an M x N integer matrix A, this function determines an M x
3419    M unimodular matrix U, and an M x N echelon matrix S such that
3420    "U.A = S".  This decomposition is also known as "right Hermite".
3421
3422    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3423    Restructuring Compilers" Utpal Banerjee.  */
3424
3425 static void
3426 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
3427                              lambda_matrix S, lambda_matrix U)
3428 {
3429   int i, j, i0 = 0;
3430
3431   lambda_matrix_copy (A, S, m, n);
3432   lambda_matrix_id (U, m);
3433
3434   for (j = 0; j < n; j++)
3435     {
3436       if (lambda_vector_first_nz (S[j], m, i0) < m)
3437         {
3438           ++i0;
3439           for (i = m - 1; i >= i0; i--)
3440             {
3441               while (S[i][j] != 0)
3442                 {
3443                   int sigma, factor, a, b;
3444
3445                   a = S[i-1][j];
3446                   b = S[i][j];
3447                   sigma = (a * b < 0) ? -1: 1;
3448                   a = abs (a);
3449                   b = abs (b);
3450                   factor = sigma * (a / b);
3451
3452                   lambda_matrix_row_add (S, n, i, i-1, -factor);
3453                   std::swap (S[i], S[i-1]);
3454
3455                   lambda_matrix_row_add (U, m, i, i-1, -factor);
3456                   std::swap (U[i], U[i-1]);
3457                 }
3458             }
3459         }
3460     }
3461 }
3462
3463 /* Determines the overlapping elements due to accesses CHREC_A and
3464    CHREC_B, that are affine functions.  This function cannot handle
3465    symbolic evolution functions, ie. when initial conditions are
3466    parameters, because it uses lambda matrices of integers.  */
3467
3468 static void
3469 analyze_subscript_affine_affine (tree chrec_a,
3470                                  tree chrec_b,
3471                                  conflict_function **overlaps_a,
3472                                  conflict_function **overlaps_b,
3473                                  tree *last_conflicts)
3474 {
3475   unsigned nb_vars_a, nb_vars_b, dim;
3476   HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
3477   lambda_matrix A, U, S;
3478   struct obstack scratch_obstack;
3479
3480   if (eq_evolutions_p (chrec_a, chrec_b))
3481     {
3482       /* The accessed index overlaps for each iteration in the
3483          loop.  */
3484       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3485       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3486       *last_conflicts = chrec_dont_know;
3487       return;
3488     }
3489   if (dump_file && (dump_flags & TDF_DETAILS))
3490     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
3491
3492   /* For determining the initial intersection, we have to solve a
3493      Diophantine equation.  This is the most time consuming part.
3494
3495      For answering to the question: "Is there a dependence?" we have
3496      to prove that there exists a solution to the Diophantine
3497      equation, and that the solution is in the iteration domain,
3498      i.e. the solution is positive or zero, and that the solution
3499      happens before the upper bound loop.nb_iterations.  Otherwise
3500      there is no dependence.  This function outputs a description of
3501      the iterations that hold the intersections.  */
3502
3503   nb_vars_a = nb_vars_in_chrec (chrec_a);
3504   nb_vars_b = nb_vars_in_chrec (chrec_b);
3505
3506   gcc_obstack_init (&scratch_obstack);
3507
3508   dim = nb_vars_a + nb_vars_b;
3509   U = lambda_matrix_new (dim, dim, &scratch_obstack);
3510   A = lambda_matrix_new (dim, 1, &scratch_obstack);
3511   S = lambda_matrix_new (dim, 1, &scratch_obstack);
3512
3513   init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
3514   init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
3515   gamma = init_b - init_a;
3516
3517   /* Don't do all the hard work of solving the Diophantine equation
3518      when we already know the solution: for example,
3519      | {3, +, 1}_1
3520      | {3, +, 4}_2
3521      | gamma = 3 - 3 = 0.
3522      Then the first overlap occurs during the first iterations:
3523      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
3524   */
3525   if (gamma == 0)
3526     {
3527       if (nb_vars_a == 1 && nb_vars_b == 1)
3528         {
3529           HOST_WIDE_INT step_a, step_b;
3530           HOST_WIDE_INT niter, niter_a, niter_b;
3531           affine_fn ova, ovb;
3532
3533           niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
3534           niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
3535           niter = MIN (niter_a, niter_b);
3536           step_a = int_cst_value (CHREC_RIGHT (chrec_a));
3537           step_b = int_cst_value (CHREC_RIGHT (chrec_b));
3538
3539           compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
3540                                                    &ova, &ovb,
3541                                                    last_conflicts, 1);
3542           *overlaps_a = conflict_fn (1, ova);
3543           *overlaps_b = conflict_fn (1, ovb);
3544         }
3545
3546       else if (nb_vars_a == 2 && nb_vars_b == 1)
3547         compute_overlap_steps_for_affine_1_2
3548           (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
3549
3550       else if (nb_vars_a == 1 && nb_vars_b == 2)
3551         compute_overlap_steps_for_affine_1_2
3552           (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
3553
3554       else
3555         {
3556           if (dump_file && (dump_flags & TDF_DETAILS))
3557             fprintf (dump_file, "affine-affine test failed: too many variables.\n");
3558           *overlaps_a = conflict_fn_not_known ();
3559           *overlaps_b = conflict_fn_not_known ();
3560           *last_conflicts = chrec_dont_know;
3561         }
3562       goto end_analyze_subs_aa;
3563     }
3564
3565   /* U.A = S */
3566   lambda_matrix_right_hermite (A, dim, 1, S, U);
3567
3568   if (S[0][0] < 0)
3569     {
3570       S[0][0] *= -1;
3571       lambda_matrix_row_negate (U, dim, 0);
3572     }
3573   gcd_alpha_beta = S[0][0];
3574
3575   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
3576      but that is a quite strange case.  Instead of ICEing, answer
3577      don't know.  */
3578   if (gcd_alpha_beta == 0)
3579     {
3580       *overlaps_a = conflict_fn_not_known ();
3581       *overlaps_b = conflict_fn_not_known ();
3582       *last_conflicts = chrec_dont_know;
3583       goto end_analyze_subs_aa;
3584     }
3585
3586   /* The classic "gcd-test".  */
3587   if (!int_divides_p (gcd_alpha_beta, gamma))
3588     {
3589       /* The "gcd-test" has determined that there is no integer
3590          solution, i.e. there is no dependence.  */
3591       *overlaps_a = conflict_fn_no_dependence ();
3592       *overlaps_b = conflict_fn_no_dependence ();
3593       *last_conflicts = integer_zero_node;
3594     }
3595
3596   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
3597   else if (nb_vars_a == 1 && nb_vars_b == 1)
3598     {
3599       /* Both functions should have the same evolution sign.  */
3600       if (((A[0][0] > 0 && -A[1][0] > 0)
3601            || (A[0][0] < 0 && -A[1][0] < 0)))
3602         {
3603           /* The solutions are given by:
3604              |
3605              | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
3606              |                           [u21 u22]    [y0]
3607
3608              For a given integer t.  Using the following variables,
3609
3610              | i0 = u11 * gamma / gcd_alpha_beta
3611              | j0 = u12 * gamma / gcd_alpha_beta
3612              | i1 = u21
3613              | j1 = u22
3614
3615              the solutions are:
3616
3617              | x0 = i0 + i1 * t,
3618              | y0 = j0 + j1 * t.  */
3619           HOST_WIDE_INT i0, j0, i1, j1;
3620
3621           i0 = U[0][0] * gamma / gcd_alpha_beta;
3622           j0 = U[0][1] * gamma / gcd_alpha_beta;
3623           i1 = U[1][0];
3624           j1 = U[1][1];
3625
3626           if ((i1 == 0 && i0 < 0)
3627               || (j1 == 0 && j0 < 0))
3628             {
3629               /* There is no solution.
3630                  FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
3631                  falls in here, but for the moment we don't look at the
3632                  upper bound of the iteration domain.  */
3633               *overlaps_a = conflict_fn_no_dependence ();
3634               *overlaps_b = conflict_fn_no_dependence ();
3635               *last_conflicts = integer_zero_node;
3636               goto end_analyze_subs_aa;
3637             }
3638
3639           if (i1 > 0 && j1 > 0)
3640             {
3641               HOST_WIDE_INT niter_a
3642                 = max_stmt_executions_int (get_chrec_loop (chrec_a));
3643               HOST_WIDE_INT niter_b
3644                 = max_stmt_executions_int (get_chrec_loop (chrec_b));
3645               HOST_WIDE_INT niter = MIN (niter_a, niter_b);
3646
3647               /* (X0, Y0) is a solution of the Diophantine equation:
3648                  "chrec_a (X0) = chrec_b (Y0)".  */
3649               HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
3650                                         CEIL (-j0, j1));
3651               HOST_WIDE_INT x0 = i1 * tau1 + i0;
3652               HOST_WIDE_INT y0 = j1 * tau1 + j0;
3653
3654               /* (X1, Y1) is the smallest positive solution of the eq
3655                  "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
3656                  first conflict occurs.  */
3657               HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
3658               HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
3659               HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
3660
3661               if (niter > 0)
3662                 {
3663                   HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter_a - i0, i1),
3664                                             FLOOR_DIV (niter_b - j0, j1));
3665                   HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
3666
3667                   /* If the overlap occurs outside of the bounds of the
3668                      loop, there is no dependence.  */
3669                   if (x1 >= niter_a || y1 >= niter_b)
3670                     {
3671                       *overlaps_a = conflict_fn_no_dependence ();
3672                       *overlaps_b = conflict_fn_no_dependence ();
3673                       *last_conflicts = integer_zero_node;
3674                       goto end_analyze_subs_aa;
3675                     }
3676                   else
3677                     *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3678                 }
3679               else
3680                 *last_conflicts = chrec_dont_know;
3681
3682               *overlaps_a
3683                 = conflict_fn (1,
3684                                affine_fn_univar (build_int_cst (NULL_TREE, x1),
3685                                                  1,
3686                                                  build_int_cst (NULL_TREE, i1)));
3687               *overlaps_b
3688                 = conflict_fn (1,
3689                                affine_fn_univar (build_int_cst (NULL_TREE, y1),
3690                                                  1,
3691                                                  build_int_cst (NULL_TREE, j1)));
3692             }
3693           else
3694             {
3695               /* FIXME: For the moment, the upper bound of the
3696                  iteration domain for i and j is not checked.  */
3697               if (dump_file && (dump_flags & TDF_DETAILS))
3698                 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3699               *overlaps_a = conflict_fn_not_known ();
3700               *overlaps_b = conflict_fn_not_known ();
3701               *last_conflicts = chrec_dont_know;
3702             }
3703         }
3704       else
3705         {
3706           if (dump_file && (dump_flags & TDF_DETAILS))
3707             fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3708           *overlaps_a = conflict_fn_not_known ();
3709           *overlaps_b = conflict_fn_not_known ();
3710           *last_conflicts = chrec_dont_know;
3711         }
3712     }
3713   else
3714     {
3715       if (dump_file && (dump_flags & TDF_DETAILS))
3716         fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3717       *overlaps_a = conflict_fn_not_known ();
3718       *overlaps_b = conflict_fn_not_known ();
3719       *last_conflicts = chrec_dont_know;
3720     }
3721
3722 end_analyze_subs_aa:
3723   obstack_free (&scratch_obstack, NULL);
3724   if (dump_file && (dump_flags & TDF_DETAILS))
3725     {
3726       fprintf (dump_file, "  (overlaps_a = ");
3727       dump_conflict_function (dump_file, *overlaps_a);
3728       fprintf (dump_file, ")\n  (overlaps_b = ");
3729       dump_conflict_function (dump_file, *overlaps_b);
3730       fprintf (dump_file, "))\n");
3731     }
3732 }
3733
3734 /* Returns true when analyze_subscript_affine_affine can be used for
3735    determining the dependence relation between chrec_a and chrec_b,
3736    that contain symbols.  This function modifies chrec_a and chrec_b
3737    such that the analysis result is the same, and such that they don't
3738    contain symbols, and then can safely be passed to the analyzer.
3739
3740    Example: The analysis of the following tuples of evolutions produce
3741    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
3742    vs. {0, +, 1}_1
3743
3744    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
3745    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
3746 */
3747
3748 static bool
3749 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
3750 {
3751   tree diff, type, left_a, left_b, right_b;
3752
3753   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
3754       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
3755     /* FIXME: For the moment not handled.  Might be refined later.  */
3756     return false;
3757
3758   type = chrec_type (*chrec_a);
3759   left_a = CHREC_LEFT (*chrec_a);
3760   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
3761   diff = chrec_fold_minus (type, left_a, left_b);
3762
3763   if (!evolution_function_is_constant_p (diff))
3764     return false;
3765
3766   if (dump_file && (dump_flags & TDF_DETAILS))
3767     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
3768
3769   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
3770                                      diff, CHREC_RIGHT (*chrec_a));
3771   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
3772   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
3773                                      build_int_cst (type, 0),
3774                                      right_b);
3775   return true;
3776 }
3777
3778 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
3779    *OVERLAPS_B are initialized to the functions that describe the
3780    relation between the elements accessed twice by CHREC_A and
3781    CHREC_B.  For k >= 0, the following property is verified:
3782
3783    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3784
3785 static void
3786 analyze_siv_subscript (tree chrec_a,
3787                        tree chrec_b,
3788                        conflict_function **overlaps_a,
3789                        conflict_function **overlaps_b,
3790                        tree *last_conflicts,
3791                        int loop_nest_num)
3792 {
3793   dependence_stats.num_siv++;
3794
3795   if (dump_file && (dump_flags & TDF_DETAILS))
3796     fprintf (dump_file, "(analyze_siv_subscript \n");
3797
3798   if (evolution_function_is_constant_p (chrec_a)
3799       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3800     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
3801                                       overlaps_a, overlaps_b, last_conflicts);
3802
3803   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3804            && evolution_function_is_constant_p (chrec_b))
3805     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
3806                                       overlaps_b, overlaps_a, last_conflicts);
3807
3808   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3809            && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3810     {
3811       if (!chrec_contains_symbols (chrec_a)
3812           && !chrec_contains_symbols (chrec_b))
3813         {
3814           analyze_subscript_affine_affine (chrec_a, chrec_b,
3815                                            overlaps_a, overlaps_b,
3816                                            last_conflicts);
3817
3818           if (CF_NOT_KNOWN_P (*overlaps_a)
3819               || CF_NOT_KNOWN_P (*overlaps_b))
3820             dependence_stats.num_siv_unimplemented++;
3821           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3822                    || CF_NO_DEPENDENCE_P (*overlaps_b))
3823             dependence_stats.num_siv_independent++;
3824           else
3825             dependence_stats.num_siv_dependent++;
3826         }
3827       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
3828                                                         &chrec_b))
3829         {
3830           analyze_subscript_affine_affine (chrec_a, chrec_b,
3831                                            overlaps_a, overlaps_b,
3832                                            last_conflicts);
3833
3834           if (CF_NOT_KNOWN_P (*overlaps_a)
3835               || CF_NOT_KNOWN_P (*overlaps_b))
3836             dependence_stats.num_siv_unimplemented++;
3837           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3838                    || CF_NO_DEPENDENCE_P (*overlaps_b))
3839             dependence_stats.num_siv_independent++;
3840           else
3841             dependence_stats.num_siv_dependent++;
3842         }
3843       else
3844         goto siv_subscript_dontknow;
3845     }
3846
3847   else
3848     {
3849     siv_subscript_dontknow:;
3850       if (dump_file && (dump_flags & TDF_DETAILS))
3851         fprintf (dump_file, "  siv test failed: unimplemented");
3852       *overlaps_a = conflict_fn_not_known ();
3853       *overlaps_b = conflict_fn_not_known ();
3854       *last_conflicts = chrec_dont_know;
3855       dependence_stats.num_siv_unimplemented++;
3856     }
3857
3858   if (dump_file && (dump_flags & TDF_DETAILS))
3859     fprintf (dump_file, ")\n");
3860 }
3861
3862 /* Returns false if we can prove that the greatest common divisor of the steps
3863    of CHREC does not divide CST, false otherwise.  */
3864
3865 static bool
3866 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
3867 {
3868   HOST_WIDE_INT cd = 0, val;
3869   tree step;
3870
3871   if (!tree_fits_shwi_p (cst))
3872     return true;
3873   val = tree_to_shwi (cst);
3874
3875   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
3876     {
3877       step = CHREC_RIGHT (chrec);
3878       if (!tree_fits_shwi_p (step))
3879         return true;
3880       cd = gcd (cd, tree_to_shwi (step));
3881       chrec = CHREC_LEFT (chrec);
3882     }
3883
3884   return val % cd == 0;
3885 }
3886
3887 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
3888    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
3889    functions that describe the relation between the elements accessed
3890    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
3891    is verified:
3892
3893    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3894
3895 static void
3896 analyze_miv_subscript (tree chrec_a,
3897                        tree chrec_b,
3898                        conflict_function **overlaps_a,
3899                        conflict_function **overlaps_b,
3900                        tree *last_conflicts,
3901                        struct loop *loop_nest)
3902 {
3903   tree type, difference;
3904
3905   dependence_stats.num_miv++;
3906   if (dump_file && (dump_flags & TDF_DETAILS))
3907     fprintf (dump_file, "(analyze_miv_subscript \n");
3908
3909   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3910   chrec_a = chrec_convert (type, chrec_a, NULL);
3911   chrec_b = chrec_convert (type, chrec_b, NULL);
3912   difference = chrec_fold_minus (type, chrec_a, chrec_b);
3913
3914   if (eq_evolutions_p (chrec_a, chrec_b))
3915     {
3916       /* Access functions are the same: all the elements are accessed
3917          in the same order.  */
3918       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3919       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3920       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
3921       dependence_stats.num_miv_dependent++;
3922     }
3923
3924   else if (evolution_function_is_constant_p (difference)
3925            /* For the moment, the following is verified:
3926               evolution_function_is_affine_multivariate_p (chrec_a,
3927               loop_nest->num) */
3928            && !gcd_of_steps_may_divide_p (chrec_a, difference))
3929     {
3930       /* testsuite/.../ssa-chrec-33.c
3931          {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
3932
3933          The difference is 1, and all the evolution steps are multiples
3934          of 2, consequently there are no overlapping elements.  */
3935       *overlaps_a = conflict_fn_no_dependence ();
3936       *overlaps_b = conflict_fn_no_dependence ();
3937       *last_conflicts = integer_zero_node;
3938       dependence_stats.num_miv_independent++;
3939     }
3940
3941   else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
3942            && !chrec_contains_symbols (chrec_a)
3943            && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
3944            && !chrec_contains_symbols (chrec_b))
3945     {
3946       /* testsuite/.../ssa-chrec-35.c
3947          {0, +, 1}_2  vs.  {0, +, 1}_3
3948          the overlapping elements are respectively located at iterations:
3949          {0, +, 1}_x and {0, +, 1}_x,
3950          in other words, we have the equality:
3951          {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
3952
3953          Other examples:
3954          {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
3955          {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
3956
3957          {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
3958          {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
3959       */
3960       analyze_subscript_affine_affine (chrec_a, chrec_b,
3961                                        overlaps_a, overlaps_b, last_conflicts);
3962
3963       if (CF_NOT_KNOWN_P (*overlaps_a)
3964           || CF_NOT_KNOWN_P (*overlaps_b))
3965         dependence_stats.num_miv_unimplemented++;
3966       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3967                || CF_NO_DEPENDENCE_P (*overlaps_b))
3968         dependence_stats.num_miv_independent++;
3969       else
3970         dependence_stats.num_miv_dependent++;
3971     }
3972
3973   else
3974     {
3975       /* When the analysis is too difficult, answer "don't know".  */
3976       if (dump_file && (dump_flags & TDF_DETAILS))
3977         fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
3978
3979       *overlaps_a = conflict_fn_not_known ();
3980       *overlaps_b = conflict_fn_not_known ();
3981       *last_conflicts = chrec_dont_know;
3982       dependence_stats.num_miv_unimplemented++;
3983     }
3984
3985   if (dump_file && (dump_flags & TDF_DETAILS))
3986     fprintf (dump_file, ")\n");
3987 }
3988
3989 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
3990    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
3991    OVERLAP_ITERATIONS_B are initialized with two functions that
3992    describe the iterations that contain conflicting elements.
3993
3994    Remark: For an integer k >= 0, the following equality is true:
3995
3996    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
3997 */
3998
3999 static void
4000 analyze_overlapping_iterations (tree chrec_a,
4001                                 tree chrec_b,
4002                                 conflict_function **overlap_iterations_a,
4003                                 conflict_function **overlap_iterations_b,
4004                                 tree *last_conflicts, struct loop *loop_nest)
4005 {
4006   unsigned int lnn = loop_nest->num;
4007
4008   dependence_stats.num_subscript_tests++;
4009
4010   if (dump_file && (dump_flags & TDF_DETAILS))
4011     {
4012       fprintf (dump_file, "(analyze_overlapping_iterations \n");
4013       fprintf (dump_file, "  (chrec_a = ");
4014       print_generic_expr (dump_file, chrec_a);
4015       fprintf (dump_file, ")\n  (chrec_b = ");
4016       print_generic_expr (dump_file, chrec_b);
4017       fprintf (dump_file, ")\n");
4018     }
4019
4020   if (chrec_a == NULL_TREE
4021       || chrec_b == NULL_TREE
4022       || chrec_contains_undetermined (chrec_a)
4023       || chrec_contains_undetermined (chrec_b))
4024     {
4025       dependence_stats.num_subscript_undetermined++;
4026
4027       *overlap_iterations_a = conflict_fn_not_known ();
4028       *overlap_iterations_b = conflict_fn_not_known ();
4029     }
4030
4031   /* If they are the same chrec, and are affine, they overlap
4032      on every iteration.  */
4033   else if (eq_evolutions_p (chrec_a, chrec_b)
4034            && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4035                || operand_equal_p (chrec_a, chrec_b, 0)))
4036     {
4037       dependence_stats.num_same_subscript_function++;
4038       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4039       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4040       *last_conflicts = chrec_dont_know;
4041     }
4042
4043   /* If they aren't the same, and aren't affine, we can't do anything
4044      yet.  */
4045   else if ((chrec_contains_symbols (chrec_a)
4046             || chrec_contains_symbols (chrec_b))
4047            && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4048                || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4049     {
4050       dependence_stats.num_subscript_undetermined++;
4051       *overlap_iterations_a = conflict_fn_not_known ();
4052       *overlap_iterations_b = conflict_fn_not_known ();
4053     }
4054
4055   else if (ziv_subscript_p (chrec_a, chrec_b))
4056     analyze_ziv_subscript (chrec_a, chrec_b,
4057                            overlap_iterations_a, overlap_iterations_b,
4058                            last_conflicts);
4059
4060   else if (siv_subscript_p (chrec_a, chrec_b))
4061     analyze_siv_subscript (chrec_a, chrec_b,
4062                            overlap_iterations_a, overlap_iterations_b,
4063                            last_conflicts, lnn);
4064
4065   else
4066     analyze_miv_subscript (chrec_a, chrec_b,
4067                            overlap_iterations_a, overlap_iterations_b,
4068                            last_conflicts, loop_nest);
4069
4070   if (dump_file && (dump_flags & TDF_DETAILS))
4071     {
4072       fprintf (dump_file, "  (overlap_iterations_a = ");
4073       dump_conflict_function (dump_file, *overlap_iterations_a);
4074       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
4075       dump_conflict_function (dump_file, *overlap_iterations_b);
4076       fprintf (dump_file, "))\n");
4077     }
4078 }
4079
4080 /* Helper function for uniquely inserting distance vectors.  */
4081
4082 static void
4083 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
4084 {
4085   unsigned i;
4086   lambda_vector v;
4087
4088   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
4089     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
4090       return;
4091
4092   DDR_DIST_VECTS (ddr).safe_push (dist_v);
4093 }
4094
4095 /* Helper function for uniquely inserting direction vectors.  */
4096
4097 static void
4098 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
4099 {
4100   unsigned i;
4101   lambda_vector v;
4102
4103   FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
4104     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
4105       return;
4106
4107   DDR_DIR_VECTS (ddr).safe_push (dir_v);
4108 }
4109
4110 /* Add a distance of 1 on all the loops outer than INDEX.  If we
4111    haven't yet determined a distance for this outer loop, push a new
4112    distance vector composed of the previous distance, and a distance
4113    of 1 for this outer loop.  Example:
4114
4115    | loop_1
4116    |   loop_2
4117    |     A[10]
4118    |   endloop_2
4119    | endloop_1
4120
4121    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
4122    save (0, 1), then we have to save (1, 0).  */
4123
4124 static void
4125 add_outer_distances (struct data_dependence_relation *ddr,
4126                      lambda_vector dist_v, int index)
4127 {
4128   /* For each outer loop where init_v is not set, the accesses are
4129      in dependence of distance 1 in the loop.  */
4130   while (--index >= 0)
4131     {
4132       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4133       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4134       save_v[index] = 1;
4135       save_dist_v (ddr, save_v);
4136     }
4137 }
4138
4139 /* Return false when fail to represent the data dependence as a
4140    distance vector.  A_INDEX is the index of the first reference
4141    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
4142    second reference.  INIT_B is set to true when a component has been
4143    added to the distance vector DIST_V.  INDEX_CARRY is then set to
4144    the index in DIST_V that carries the dependence.  */
4145
4146 static bool
4147 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
4148                              unsigned int a_index, unsigned int b_index,
4149                              lambda_vector dist_v, bool *init_b,
4150                              int *index_carry)
4151 {
4152   unsigned i;
4153   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4154
4155   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4156     {
4157       tree access_fn_a, access_fn_b;
4158       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
4159
4160       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4161         {
4162           non_affine_dependence_relation (ddr);
4163           return false;
4164         }
4165
4166       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
4167       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
4168
4169       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
4170           && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
4171         {
4172           HOST_WIDE_INT dist;
4173           int index;
4174           int var_a = CHREC_VARIABLE (access_fn_a);
4175           int var_b = CHREC_VARIABLE (access_fn_b);
4176
4177           if (var_a != var_b
4178               || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4179             {
4180               non_affine_dependence_relation (ddr);
4181               return false;
4182             }
4183
4184           dist = int_cst_value (SUB_DISTANCE (subscript));
4185           index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
4186           *index_carry = MIN (index, *index_carry);
4187
4188           /* This is the subscript coupling test.  If we have already
4189              recorded a distance for this loop (a distance coming from
4190              another subscript), it should be the same.  For example,
4191              in the following code, there is no dependence:
4192
4193              | loop i = 0, N, 1
4194              |   T[i+1][i] = ...
4195              |   ... = T[i][i]
4196              | endloop
4197           */
4198           if (init_v[index] != 0 && dist_v[index] != dist)
4199             {
4200               finalize_ddr_dependent (ddr, chrec_known);
4201               return false;
4202             }
4203
4204           dist_v[index] = dist;
4205           init_v[index] = 1;
4206           *init_b = true;
4207         }
4208       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
4209         {
4210           /* This can be for example an affine vs. constant dependence
4211              (T[i] vs. T[3]) that is not an affine dependence and is
4212              not representable as a distance vector.  */
4213           non_affine_dependence_relation (ddr);
4214           return false;
4215         }
4216     }
4217
4218   return true;
4219 }
4220
4221 /* Return true when the DDR contains only constant access functions.  */
4222
4223 static bool
4224 constant_access_functions (const struct data_dependence_relation *ddr)
4225 {
4226   unsigned i;
4227   subscript *sub;
4228
4229   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4230     if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
4231         || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
4232       return false;
4233
4234   return true;
4235 }
4236
4237 /* Helper function for the case where DDR_A and DDR_B are the same
4238    multivariate access function with a constant step.  For an example
4239    see pr34635-1.c.  */
4240
4241 static void
4242 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
4243 {
4244   int x_1, x_2;
4245   tree c_1 = CHREC_LEFT (c_2);
4246   tree c_0 = CHREC_LEFT (c_1);
4247   lambda_vector dist_v;
4248   HOST_WIDE_INT v1, v2, cd;
4249
4250   /* Polynomials with more than 2 variables are not handled yet.  When
4251      the evolution steps are parameters, it is not possible to
4252      represent the dependence using classical distance vectors.  */
4253   if (TREE_CODE (c_0) != INTEGER_CST
4254       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
4255       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
4256     {
4257       DDR_AFFINE_P (ddr) = false;
4258       return;
4259     }
4260
4261   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
4262   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
4263
4264   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
4265   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4266   v1 = int_cst_value (CHREC_RIGHT (c_1));
4267   v2 = int_cst_value (CHREC_RIGHT (c_2));
4268   cd = gcd (v1, v2);
4269   v1 /= cd;
4270   v2 /= cd;
4271
4272   if (v2 < 0)
4273     {
4274       v2 = -v2;
4275       v1 = -v1;
4276     }
4277
4278   dist_v[x_1] = v2;
4279   dist_v[x_2] = -v1;
4280   save_dist_v (ddr, dist_v);
4281
4282   add_outer_distances (ddr, dist_v, x_1);
4283 }
4284
4285 /* Helper function for the case where DDR_A and DDR_B are the same
4286    access functions.  */
4287
4288 static void
4289 add_other_self_distances (struct data_dependence_relation *ddr)
4290 {
4291   lambda_vector dist_v;
4292   unsigned i;
4293   int index_carry = DDR_NB_LOOPS (ddr);
4294   subscript *sub;
4295
4296   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4297     {
4298       tree access_fun = SUB_ACCESS_FN (sub, 0);
4299
4300       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
4301         {
4302           if (!evolution_function_is_univariate_p (access_fun))
4303             {
4304               if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
4305                 {
4306                   DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
4307                   return;
4308                 }
4309
4310               access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
4311
4312               if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
4313                 add_multivariate_self_dist (ddr, access_fun);
4314               else
4315                 /* The evolution step is not constant: it varies in
4316                    the outer loop, so this cannot be represented by a
4317                    distance vector.  For example in pr34635.c the
4318                    evolution is {0, +, {0, +, 4}_1}_2.  */
4319                 DDR_AFFINE_P (ddr) = false;
4320
4321               return;
4322             }
4323
4324           index_carry = MIN (index_carry,
4325                              index_in_loop_nest (CHREC_VARIABLE (access_fun),
4326                                                  DDR_LOOP_NEST (ddr)));
4327         }
4328     }
4329
4330   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4331   add_outer_distances (ddr, dist_v, index_carry);
4332 }
4333
4334 static void
4335 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
4336 {
4337   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4338
4339   dist_v[DDR_INNER_LOOP (ddr)] = 1;
4340   save_dist_v (ddr, dist_v);
4341 }
4342
4343 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
4344    is the case for example when access functions are the same and
4345    equal to a constant, as in:
4346
4347    | loop_1
4348    |   A[3] = ...
4349    |   ... = A[3]
4350    | endloop_1
4351
4352    in which case the distance vectors are (0) and (1).  */
4353
4354 static void
4355 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
4356 {
4357   unsigned i, j;
4358
4359   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4360     {
4361       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
4362       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
4363       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
4364
4365       for (j = 0; j < ca->n; j++)
4366         if (affine_function_zero_p (ca->fns[j]))
4367           {
4368             insert_innermost_unit_dist_vector (ddr);
4369             return;
4370           }
4371
4372       for (j = 0; j < cb->n; j++)
4373         if (affine_function_zero_p (cb->fns[j]))
4374           {
4375             insert_innermost_unit_dist_vector (ddr);
4376             return;
4377           }
4378     }
4379 }
4380
4381 /* Return true when the DDR contains two data references that have the
4382    same access functions.  */
4383
4384 static inline bool
4385 same_access_functions (const struct data_dependence_relation *ddr)
4386 {
4387   unsigned i;
4388   subscript *sub;
4389
4390   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4391     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
4392                           SUB_ACCESS_FN (sub, 1)))
4393       return false;
4394
4395   return true;
4396 }
4397
4398 /* Compute the classic per loop distance vector.  DDR is the data
4399    dependence relation to build a vector from.  Return false when fail
4400    to represent the data dependence as a distance vector.  */
4401
4402 static bool
4403 build_classic_dist_vector (struct data_dependence_relation *ddr,
4404                            struct loop *loop_nest)
4405 {
4406   bool init_b = false;
4407   int index_carry = DDR_NB_LOOPS (ddr);
4408   lambda_vector dist_v;
4409
4410   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4411     return false;
4412
4413   if (same_access_functions (ddr))
4414     {
4415       /* Save the 0 vector.  */
4416       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4417       save_dist_v (ddr, dist_v);
4418
4419       if (constant_access_functions (ddr))
4420         add_distance_for_zero_overlaps (ddr);
4421
4422       if (DDR_NB_LOOPS (ddr) > 1)
4423         add_other_self_distances (ddr);
4424
4425       return true;
4426     }
4427
4428   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4429   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
4430     return false;
4431
4432   /* Save the distance vector if we initialized one.  */
4433   if (init_b)
4434     {
4435       /* Verify a basic constraint: classic distance vectors should
4436          always be lexicographically positive.
4437
4438          Data references are collected in the order of execution of
4439          the program, thus for the following loop
4440
4441          | for (i = 1; i < 100; i++)
4442          |   for (j = 1; j < 100; j++)
4443          |     {
4444          |       t = T[j+1][i-1];  // A
4445          |       T[j][i] = t + 2;  // B
4446          |     }
4447
4448          references are collected following the direction of the wind:
4449          A then B.  The data dependence tests are performed also
4450          following this order, such that we're looking at the distance
4451          separating the elements accessed by A from the elements later
4452          accessed by B.  But in this example, the distance returned by
4453          test_dep (A, B) is lexicographically negative (-1, 1), that
4454          means that the access A occurs later than B with respect to
4455          the outer loop, ie. we're actually looking upwind.  In this
4456          case we solve test_dep (B, A) looking downwind to the
4457          lexicographically positive solution, that returns the
4458          distance vector (1, -1).  */
4459       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
4460         {
4461           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4462           if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4463             return false;
4464           compute_subscript_distance (ddr);
4465           if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
4466                                             &index_carry))
4467             return false;
4468           save_dist_v (ddr, save_v);
4469           DDR_REVERSED_P (ddr) = true;
4470
4471           /* In this case there is a dependence forward for all the
4472              outer loops:
4473
4474              | for (k = 1; k < 100; k++)
4475              |  for (i = 1; i < 100; i++)
4476              |   for (j = 1; j < 100; j++)
4477              |     {
4478              |       t = T[j+1][i-1];  // A
4479              |       T[j][i] = t + 2;  // B
4480              |     }
4481
4482              the vectors are:
4483              (0,  1, -1)
4484              (1,  1, -1)
4485              (1, -1,  1)
4486           */
4487           if (DDR_NB_LOOPS (ddr) > 1)
4488             {
4489               add_outer_distances (ddr, save_v, index_carry);
4490               add_outer_distances (ddr, dist_v, index_carry);
4491             }
4492         }
4493       else
4494         {
4495           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4496           lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4497
4498           if (DDR_NB_LOOPS (ddr) > 1)
4499             {
4500               lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4501
4502               if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4503                 return false;
4504               compute_subscript_distance (ddr);
4505               if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
4506                                                 &index_carry))
4507                 return false;
4508
4509               save_dist_v (ddr, save_v);
4510               add_outer_distances (ddr, dist_v, index_carry);
4511               add_outer_distances (ddr, opposite_v, index_carry);
4512             }
4513           else
4514             save_dist_v (ddr, save_v);
4515         }
4516     }
4517   else
4518     {
4519       /* There is a distance of 1 on all the outer loops: Example:
4520          there is a dependence of distance 1 on loop_1 for the array A.
4521
4522          | loop_1
4523          |   A[5] = ...
4524          | endloop
4525       */
4526       add_outer_distances (ddr, dist_v,
4527                            lambda_vector_first_nz (dist_v,
4528                                                    DDR_NB_LOOPS (ddr), 0));
4529     }
4530
4531   if (dump_file && (dump_flags & TDF_DETAILS))
4532     {
4533       unsigned i;
4534
4535       fprintf (dump_file, "(build_classic_dist_vector\n");
4536       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4537         {
4538           fprintf (dump_file, "  dist_vector = (");
4539           print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
4540                                DDR_NB_LOOPS (ddr));
4541           fprintf (dump_file, "  )\n");
4542         }
4543       fprintf (dump_file, ")\n");
4544     }
4545
4546   return true;
4547 }
4548
4549 /* Return the direction for a given distance.
4550    FIXME: Computing dir this way is suboptimal, since dir can catch
4551    cases that dist is unable to represent.  */
4552
4553 static inline enum data_dependence_direction
4554 dir_from_dist (int dist)
4555 {
4556   if (dist > 0)
4557     return dir_positive;
4558   else if (dist < 0)
4559     return dir_negative;
4560   else
4561     return dir_equal;
4562 }
4563
4564 /* Compute the classic per loop direction vector.  DDR is the data
4565    dependence relation to build a vector from.  */
4566
4567 static void
4568 build_classic_dir_vector (struct data_dependence_relation *ddr)
4569 {
4570   unsigned i, j;
4571   lambda_vector dist_v;
4572
4573   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
4574     {
4575       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4576
4577       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4578         dir_v[j] = dir_from_dist (dist_v[j]);
4579
4580       save_dir_v (ddr, dir_v);
4581     }
4582 }
4583
4584 /* Helper function.  Returns true when there is a dependence between the
4585    data references.  A_INDEX is the index of the first reference (0 for
4586    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
4587
4588 static bool
4589 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
4590                                unsigned int a_index, unsigned int b_index,
4591                                struct loop *loop_nest)
4592 {
4593   unsigned int i;
4594   tree last_conflicts;
4595   struct subscript *subscript;
4596   tree res = NULL_TREE;
4597
4598   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
4599     {
4600       conflict_function *overlaps_a, *overlaps_b;
4601
4602       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
4603                                       SUB_ACCESS_FN (subscript, b_index),
4604                                       &overlaps_a, &overlaps_b,
4605                                       &last_conflicts, loop_nest);
4606
4607       if (SUB_CONFLICTS_IN_A (subscript))
4608         free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4609       if (SUB_CONFLICTS_IN_B (subscript))
4610         free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4611
4612       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
4613       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
4614       SUB_LAST_CONFLICT (subscript) = last_conflicts;
4615
4616       /* If there is any undetermined conflict function we have to
4617          give a conservative answer in case we cannot prove that
4618          no dependence exists when analyzing another subscript.  */
4619       if (CF_NOT_KNOWN_P (overlaps_a)
4620           || CF_NOT_KNOWN_P (overlaps_b))
4621         {
4622           res = chrec_dont_know;
4623           continue;
4624         }
4625
4626       /* When there is a subscript with no dependence we can stop.  */
4627       else if (CF_NO_DEPENDENCE_P (overlaps_a)
4628                || CF_NO_DEPENDENCE_P (overlaps_b))
4629         {
4630           res = chrec_known;
4631           break;
4632         }
4633     }
4634
4635   if (res == NULL_TREE)
4636     return true;
4637
4638   if (res == chrec_known)
4639     dependence_stats.num_dependence_independent++;
4640   else
4641     dependence_stats.num_dependence_undetermined++;
4642   finalize_ddr_dependent (ddr, res);
4643   return false;
4644 }
4645
4646 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
4647
4648 static void
4649 subscript_dependence_tester (struct data_dependence_relation *ddr,
4650                              struct loop *loop_nest)
4651 {
4652   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
4653     dependence_stats.num_dependence_dependent++;
4654
4655   compute_subscript_distance (ddr);
4656   if (build_classic_dist_vector (ddr, loop_nest))
4657     build_classic_dir_vector (ddr);
4658 }
4659
4660 /* Returns true when all the access functions of A are affine or
4661    constant with respect to LOOP_NEST.  */
4662
4663 static bool
4664 access_functions_are_affine_or_constant_p (const struct data_reference *a,
4665                                            const struct loop *loop_nest)
4666 {
4667   unsigned int i;
4668   vec<tree> fns = DR_ACCESS_FNS (a);
4669   tree t;
4670
4671   FOR_EACH_VEC_ELT (fns, i, t)
4672     if (!evolution_function_is_invariant_p (t, loop_nest->num)
4673         && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
4674       return false;
4675
4676   return true;
4677 }
4678
4679 /* This computes the affine dependence relation between A and B with
4680    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
4681    independence between two accesses, while CHREC_DONT_KNOW is used
4682    for representing the unknown relation.
4683
4684    Note that it is possible to stop the computation of the dependence
4685    relation the first time we detect a CHREC_KNOWN element for a given
4686    subscript.  */
4687
4688 void
4689 compute_affine_dependence (struct data_dependence_relation *ddr,
4690                            struct loop *loop_nest)
4691 {
4692   struct data_reference *dra = DDR_A (ddr);
4693   struct data_reference *drb = DDR_B (ddr);
4694
4695   if (dump_file && (dump_flags & TDF_DETAILS))
4696     {
4697       fprintf (dump_file, "(compute_affine_dependence\n");
4698       fprintf (dump_file, "  stmt_a: ");
4699       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4700       fprintf (dump_file, "  stmt_b: ");
4701       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4702     }
4703
4704   /* Analyze only when the dependence relation is not yet known.  */
4705   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4706     {
4707       dependence_stats.num_dependence_tests++;
4708
4709       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4710           && access_functions_are_affine_or_constant_p (drb, loop_nest))
4711         subscript_dependence_tester (ddr, loop_nest);
4712
4713       /* As a last case, if the dependence cannot be determined, or if
4714          the dependence is considered too difficult to determine, answer
4715          "don't know".  */
4716       else
4717         {
4718           dependence_stats.num_dependence_undetermined++;
4719
4720           if (dump_file && (dump_flags & TDF_DETAILS))
4721             {
4722               fprintf (dump_file, "Data ref a:\n");
4723               dump_data_reference (dump_file, dra);
4724               fprintf (dump_file, "Data ref b:\n");
4725               dump_data_reference (dump_file, drb);
4726               fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4727             }
4728           finalize_ddr_dependent (ddr, chrec_dont_know);
4729         }
4730     }
4731
4732   if (dump_file && (dump_flags & TDF_DETAILS))
4733     {
4734       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4735         fprintf (dump_file, ") -> no dependence\n");
4736       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4737         fprintf (dump_file, ") -> dependence analysis failed\n");
4738       else
4739         fprintf (dump_file, ")\n");
4740     }
4741 }
4742
4743 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4744    the data references in DATAREFS, in the LOOP_NEST.  When
4745    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4746    relations.  Return true when successful, i.e. data references number
4747    is small enough to be handled.  */
4748
4749 bool
4750 compute_all_dependences (vec<data_reference_p> datarefs,
4751                          vec<ddr_p> *dependence_relations,
4752                          vec<loop_p> loop_nest,
4753                          bool compute_self_and_rr)
4754 {
4755   struct data_dependence_relation *ddr;
4756   struct data_reference *a, *b;
4757   unsigned int i, j;
4758
4759   if ((int) datarefs.length ()
4760       > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4761     {
4762       struct data_dependence_relation *ddr;
4763
4764       /* Insert a single relation into dependence_relations:
4765          chrec_dont_know.  */
4766       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4767       dependence_relations->safe_push (ddr);
4768       return false;
4769     }
4770
4771   FOR_EACH_VEC_ELT (datarefs, i, a)
4772     for (j = i + 1; datarefs.iterate (j, &b); j++)
4773       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4774         {
4775           ddr = initialize_data_dependence_relation (a, b, loop_nest);
4776           dependence_relations->safe_push (ddr);
4777           if (loop_nest.exists ())
4778             compute_affine_dependence (ddr, loop_nest[0]);
4779         }
4780
4781   if (compute_self_and_rr)
4782     FOR_EACH_VEC_ELT (datarefs, i, a)
4783       {
4784         ddr = initialize_data_dependence_relation (a, a, loop_nest);
4785         dependence_relations->safe_push (ddr);
4786         if (loop_nest.exists ())
4787           compute_affine_dependence (ddr, loop_nest[0]);
4788       }
4789
4790   return true;
4791 }
4792
4793 /* Describes a location of a memory reference.  */
4794
4795 struct data_ref_loc
4796 {
4797   /* The memory reference.  */
4798   tree ref;
4799
4800   /* True if the memory reference is read.  */
4801   bool is_read;
4802
4803   /* True if the data reference is conditional within the containing
4804      statement, i.e. if it might not occur even when the statement
4805      is executed and runs to completion.  */
4806   bool is_conditional_in_stmt;
4807 };
4808
4809
4810 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
4811    true if STMT clobbers memory, false otherwise.  */
4812
4813 static bool
4814 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
4815 {
4816   bool clobbers_memory = false;
4817   data_ref_loc ref;
4818   tree op0, op1;
4819   enum gimple_code stmt_code = gimple_code (stmt);
4820
4821   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4822      As we cannot model data-references to not spelled out
4823      accesses give up if they may occur.  */
4824   if (stmt_code == GIMPLE_CALL
4825       && !(gimple_call_flags (stmt) & ECF_CONST))
4826     {
4827       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
4828       if (gimple_call_internal_p (stmt))
4829         switch (gimple_call_internal_fn (stmt))
4830           {
4831           case IFN_GOMP_SIMD_LANE:
4832             {
4833               struct loop *loop = gimple_bb (stmt)->loop_father;
4834               tree uid = gimple_call_arg (stmt, 0);
4835               gcc_assert (TREE_CODE (uid) == SSA_NAME);
4836               if (loop == NULL
4837                   || loop->simduid != SSA_NAME_VAR (uid))
4838                 clobbers_memory = true;
4839               break;
4840             }
4841           case IFN_MASK_LOAD:
4842           case IFN_MASK_STORE:
4843             break;
4844           default:
4845             clobbers_memory = true;
4846             break;
4847           }
4848       else
4849         clobbers_memory = true;
4850     }
4851   else if (stmt_code == GIMPLE_ASM
4852            && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
4853                || gimple_vuse (stmt)))
4854     clobbers_memory = true;
4855
4856   if (!gimple_vuse (stmt))
4857     return clobbers_memory;
4858
4859   if (stmt_code == GIMPLE_ASSIGN)
4860     {
4861       tree base;
4862       op0 = gimple_assign_lhs (stmt);
4863       op1 = gimple_assign_rhs1 (stmt);
4864
4865       if (DECL_P (op1)
4866           || (REFERENCE_CLASS_P (op1)
4867               && (base = get_base_address (op1))
4868               && TREE_CODE (base) != SSA_NAME
4869               && !is_gimple_min_invariant (base)))
4870         {
4871           ref.ref = op1;
4872           ref.is_read = true;
4873           ref.is_conditional_in_stmt = false;
4874           references->safe_push (ref);
4875         }
4876     }
4877   else if (stmt_code == GIMPLE_CALL)
4878     {
4879       unsigned i, n;
4880       tree ptr, type;
4881       unsigned int align;
4882
4883       ref.is_read = false;
4884       if (gimple_call_internal_p (stmt))
4885         switch (gimple_call_internal_fn (stmt))
4886           {
4887           case IFN_MASK_LOAD:
4888             if (gimple_call_lhs (stmt) == NULL_TREE)
4889               break;
4890             ref.is_read = true;
4891             /* FALLTHRU */
4892           case IFN_MASK_STORE:
4893             ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
4894             align = tree_to_shwi (gimple_call_arg (stmt, 1));
4895             if (ref.is_read)
4896               type = TREE_TYPE (gimple_call_lhs (stmt));
4897             else
4898               type = TREE_TYPE (gimple_call_arg (stmt, 3));
4899             if (TYPE_ALIGN (type) != align)
4900               type = build_aligned_type (type, align);
4901             ref.is_conditional_in_stmt = true;
4902             ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
4903                                    ptr);
4904             references->safe_push (ref);
4905             return false;
4906           default:
4907             break;
4908           }
4909
4910       op0 = gimple_call_lhs (stmt);
4911       n = gimple_call_num_args (stmt);
4912       for (i = 0; i < n; i++)
4913         {
4914           op1 = gimple_call_arg (stmt, i);
4915
4916           if (DECL_P (op1)
4917               || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
4918             {
4919               ref.ref = op1;
4920               ref.is_read = true;
4921               ref.is_conditional_in_stmt = false;
4922               references->safe_push (ref);
4923             }
4924         }
4925     }
4926   else
4927     return clobbers_memory;
4928
4929   if (op0
4930       && (DECL_P (op0)
4931           || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
4932     {
4933       ref.ref = op0;
4934       ref.is_read = false;
4935       ref.is_conditional_in_stmt = false;
4936       references->safe_push (ref);
4937     }
4938   return clobbers_memory;
4939 }
4940
4941
4942 /* Returns true if the loop-nest has any data reference.  */
4943
4944 bool
4945 loop_nest_has_data_refs (loop_p loop)
4946 {
4947   basic_block *bbs = get_loop_body (loop);
4948   auto_vec<data_ref_loc, 3> references;
4949
4950   for (unsigned i = 0; i < loop->num_nodes; i++)
4951     {
4952       basic_block bb = bbs[i];
4953       gimple_stmt_iterator bsi;
4954
4955       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4956         {
4957           gimple *stmt = gsi_stmt (bsi);
4958           get_references_in_stmt (stmt, &references);
4959           if (references.length ())
4960             {
4961               free (bbs);
4962               return true;
4963             }
4964         }
4965     }
4966   free (bbs);
4967   return false;
4968 }
4969
4970 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
4971    reference, returns false, otherwise returns true.  NEST is the outermost
4972    loop of the loop nest in which the references should be analyzed.  */
4973
4974 bool
4975 find_data_references_in_stmt (struct loop *nest, gimple *stmt,
4976                               vec<data_reference_p> *datarefs)
4977 {
4978   unsigned i;
4979   auto_vec<data_ref_loc, 2> references;
4980   data_ref_loc *ref;
4981   bool ret = true;
4982   data_reference_p dr;
4983
4984   if (get_references_in_stmt (stmt, &references))
4985     return false;
4986
4987   FOR_EACH_VEC_ELT (references, i, ref)
4988     {
4989       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
4990                             loop_containing_stmt (stmt), ref->ref,
4991                             stmt, ref->is_read, ref->is_conditional_in_stmt);
4992       gcc_assert (dr != NULL);
4993       datarefs->safe_push (dr);
4994     }
4995
4996   return ret;
4997 }
4998
4999 /* Stores the data references in STMT to DATAREFS.  If there is an
5000    unanalyzable reference, returns false, otherwise returns true.
5001    NEST is the outermost loop of the loop nest in which the references
5002    should be instantiated, LOOP is the loop in which the references
5003    should be analyzed.  */
5004
5005 bool
5006 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5007                                        vec<data_reference_p> *datarefs)
5008 {
5009   unsigned i;
5010   auto_vec<data_ref_loc, 2> references;
5011   data_ref_loc *ref;
5012   bool ret = true;
5013   data_reference_p dr;
5014
5015   if (get_references_in_stmt (stmt, &references))
5016     return false;
5017
5018   FOR_EACH_VEC_ELT (references, i, ref)
5019     {
5020       dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
5021                             ref->is_conditional_in_stmt);
5022       gcc_assert (dr != NULL);
5023       datarefs->safe_push (dr);
5024     }
5025
5026   return ret;
5027 }
5028
5029 /* Search the data references in LOOP, and record the information into
5030    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5031    difficult case, returns NULL_TREE otherwise.  */
5032
5033 tree
5034 find_data_references_in_bb (struct loop *loop, basic_block bb,
5035                             vec<data_reference_p> *datarefs)
5036 {
5037   gimple_stmt_iterator bsi;
5038
5039   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5040     {
5041       gimple *stmt = gsi_stmt (bsi);
5042
5043       if (!find_data_references_in_stmt (loop, stmt, datarefs))
5044         {
5045           struct data_reference *res;
5046           res = XCNEW (struct data_reference);
5047           datarefs->safe_push (res);
5048
5049           return chrec_dont_know;
5050         }
5051     }
5052
5053   return NULL_TREE;
5054 }
5055
5056 /* Search the data references in LOOP, and record the information into
5057    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5058    difficult case, returns NULL_TREE otherwise.
5059
5060    TODO: This function should be made smarter so that it can handle address
5061    arithmetic as if they were array accesses, etc.  */
5062
5063 tree
5064 find_data_references_in_loop (struct loop *loop,
5065                               vec<data_reference_p> *datarefs)
5066 {
5067   basic_block bb, *bbs;
5068   unsigned int i;
5069
5070   bbs = get_loop_body_in_dom_order (loop);
5071
5072   for (i = 0; i < loop->num_nodes; i++)
5073     {
5074       bb = bbs[i];
5075
5076       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
5077         {
5078           free (bbs);
5079           return chrec_dont_know;
5080         }
5081     }
5082   free (bbs);
5083
5084   return NULL_TREE;
5085 }
5086
5087 /* Return the alignment in bytes that DRB is guaranteed to have at all
5088    times.  */
5089
5090 unsigned int
5091 dr_alignment (innermost_loop_behavior *drb)
5092 {
5093   /* Get the alignment of BASE_ADDRESS + INIT.  */
5094   unsigned int alignment = drb->base_alignment;
5095   unsigned int misalignment = (drb->base_misalignment
5096                                + TREE_INT_CST_LOW (drb->init));
5097   if (misalignment != 0)
5098     alignment = MIN (alignment, misalignment & -misalignment);
5099
5100   /* Cap it to the alignment of OFFSET.  */
5101   if (!integer_zerop (drb->offset))
5102     alignment = MIN (alignment, drb->offset_alignment);
5103
5104   /* Cap it to the alignment of STEP.  */
5105   if (!integer_zerop (drb->step))
5106     alignment = MIN (alignment, drb->step_alignment);
5107
5108   return alignment;
5109 }
5110
5111 /* Recursive helper function.  */
5112
5113 static bool
5114 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
5115 {
5116   /* Inner loops of the nest should not contain siblings.  Example:
5117      when there are two consecutive loops,
5118
5119      | loop_0
5120      |   loop_1
5121      |     A[{0, +, 1}_1]
5122      |   endloop_1
5123      |   loop_2
5124      |     A[{0, +, 1}_2]
5125      |   endloop_2
5126      | endloop_0
5127
5128      the dependence relation cannot be captured by the distance
5129      abstraction.  */
5130   if (loop->next)
5131     return false;
5132
5133   loop_nest->safe_push (loop);
5134   if (loop->inner)
5135     return find_loop_nest_1 (loop->inner, loop_nest);
5136   return true;
5137 }
5138
5139 /* Return false when the LOOP is not well nested.  Otherwise return
5140    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
5141    contain the loops from the outermost to the innermost, as they will
5142    appear in the classic distance vector.  */
5143
5144 bool
5145 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
5146 {
5147   loop_nest->safe_push (loop);
5148   if (loop->inner)
5149     return find_loop_nest_1 (loop->inner, loop_nest);
5150   return true;
5151 }
5152
5153 /* Returns true when the data dependences have been computed, false otherwise.
5154    Given a loop nest LOOP, the following vectors are returned:
5155    DATAREFS is initialized to all the array elements contained in this loop,
5156    DEPENDENCE_RELATIONS contains the relations between the data references.
5157    Compute read-read and self relations if
5158    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
5159
5160 bool
5161 compute_data_dependences_for_loop (struct loop *loop,
5162                                    bool compute_self_and_read_read_dependences,
5163                                    vec<loop_p> *loop_nest,
5164                                    vec<data_reference_p> *datarefs,
5165                                    vec<ddr_p> *dependence_relations)
5166 {
5167   bool res = true;
5168
5169   memset (&dependence_stats, 0, sizeof (dependence_stats));
5170
5171   /* If the loop nest is not well formed, or one of the data references
5172      is not computable, give up without spending time to compute other
5173      dependences.  */
5174   if (!loop
5175       || !find_loop_nest (loop, loop_nest)
5176       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
5177       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
5178                                    compute_self_and_read_read_dependences))
5179     res = false;
5180
5181   if (dump_file && (dump_flags & TDF_STATS))
5182     {
5183       fprintf (dump_file, "Dependence tester statistics:\n");
5184
5185       fprintf (dump_file, "Number of dependence tests: %d\n",
5186                dependence_stats.num_dependence_tests);
5187       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
5188                dependence_stats.num_dependence_dependent);
5189       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
5190                dependence_stats.num_dependence_independent);
5191       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
5192                dependence_stats.num_dependence_undetermined);
5193
5194       fprintf (dump_file, "Number of subscript tests: %d\n",
5195                dependence_stats.num_subscript_tests);
5196       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
5197                dependence_stats.num_subscript_undetermined);
5198       fprintf (dump_file, "Number of same subscript function: %d\n",
5199                dependence_stats.num_same_subscript_function);
5200
5201       fprintf (dump_file, "Number of ziv tests: %d\n",
5202                dependence_stats.num_ziv);
5203       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
5204                dependence_stats.num_ziv_dependent);
5205       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
5206                dependence_stats.num_ziv_independent);
5207       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
5208                dependence_stats.num_ziv_unimplemented);
5209
5210       fprintf (dump_file, "Number of siv tests: %d\n",
5211                dependence_stats.num_siv);
5212       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
5213                dependence_stats.num_siv_dependent);
5214       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
5215                dependence_stats.num_siv_independent);
5216       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
5217                dependence_stats.num_siv_unimplemented);
5218
5219       fprintf (dump_file, "Number of miv tests: %d\n",
5220                dependence_stats.num_miv);
5221       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
5222                dependence_stats.num_miv_dependent);
5223       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
5224                dependence_stats.num_miv_independent);
5225       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
5226                dependence_stats.num_miv_unimplemented);
5227     }
5228
5229   return res;
5230 }
5231
5232 /* Free the memory used by a data dependence relation DDR.  */
5233
5234 void
5235 free_dependence_relation (struct data_dependence_relation *ddr)
5236 {
5237   if (ddr == NULL)
5238     return;
5239
5240   if (DDR_SUBSCRIPTS (ddr).exists ())
5241     free_subscripts (DDR_SUBSCRIPTS (ddr));
5242   DDR_DIST_VECTS (ddr).release ();
5243   DDR_DIR_VECTS (ddr).release ();
5244
5245   free (ddr);
5246 }
5247
5248 /* Free the memory used by the data dependence relations from
5249    DEPENDENCE_RELATIONS.  */
5250
5251 void
5252 free_dependence_relations (vec<ddr_p> dependence_relations)
5253 {
5254   unsigned int i;
5255   struct data_dependence_relation *ddr;
5256
5257   FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5258     if (ddr)
5259       free_dependence_relation (ddr);
5260
5261   dependence_relations.release ();
5262 }
5263
5264 /* Free the memory used by the data references from DATAREFS.  */
5265
5266 void
5267 free_data_refs (vec<data_reference_p> datarefs)
5268 {
5269   unsigned int i;
5270   struct data_reference *dr;
5271
5272   FOR_EACH_VEC_ELT (datarefs, i, dr)
5273     free_data_ref (dr);
5274   datarefs.release ();
5275 }