gcc/tree-data-ref.c

   1 /* Data references and dependences detectors.
   2    Copyright (C) 2003-2014 Free Software Foundation, Inc.
   3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass walks a given loop structure searching for array
  22    references.  The information about the array accesses is recorded
  23    in DATA_REFERENCE structures.
  24
  25    The basic test for determining the dependences is:
  26    given two access functions chrec1 and chrec2 to a same array, and
  27    x and y two vectors from the iteration domain, the same element of
  28    the array is accessed twice at iterations x and y if and only if:
  29    |             chrec1 (x) == chrec2 (y).
  30
  31    The goals of this analysis are:
  32
  33    - to determine the independence: the relation between two
  34      independent accesses is qualified with the chrec_known (this
  35      information allows a loop parallelization),
  36
  37    - when two data references access the same data, to qualify the
  38      dependence relation with classic dependence representations:
  39
  40        - distance vectors
  41        - direction vectors
  42        - loop carried level dependence
  43        - polyhedron dependence
  44      or with the chains of recurrences based representation,
  45
  46    - to define a knowledge base for storing the data dependence
  47      information,
  48
  49    - to define an interface to access this data.
  50
  51
  52    Definitions:
  53
  54    - subscript: given two array accesses a subscript is the tuple
  55    composed of the access functions for a given dimension.  Example:
  56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
  57    (f1, g1), (f2, g2), (f3, g3).
  58
  59    - Diophantine equation: an equation whose coefficients and
  60    solutions are integer constants, for example the equation
  61    |   3*x + 2*y = 1
  62    has an integer solution x = 1 and y = -1.
  63
  64    References:
  65
  66    - "Advanced Compilation for High Performance Computing" by Randy
  67    Allen and Ken Kennedy.
  68    http://citeseer.ist.psu.edu/goff91practical.html
  69
  70    - "Loop Transformations for Restructuring Compilers - The Foundations"
  71    by Utpal Banerjee.
  72
  73
  74 */
  75
  76 #include "config.h"
  77 #include "system.h"
  78 #include "coretypes.h"
  79 #include "tree.h"
  80 #include "expr.h"
  81 #include "gimple-pretty-print.h"
  82 #include "predict.h"
  83 #include "vec.h"
  84 #include "hashtab.h"
  85 #include "hash-set.h"
  86 #include "machmode.h"
  87 #include "tm.h"
  88 #include "hard-reg-set.h"
  89 #include "input.h"
  90 #include "function.h"
  91 #include "dominance.h"
  92 #include "cfg.h"
  93 #include "basic-block.h"
  94 #include "tree-ssa-alias.h"
  95 #include "internal-fn.h"
  96 #include "gimple-expr.h"
  97 #include "is-a.h"
  98 #include "gimple.h"
  99 #include "gimple-iterator.h"
 100 #include "tree-ssa-loop-niter.h"
 101 #include "tree-ssa-loop.h"
 102 #include "tree-ssa.h"
 103 #include "cfgloop.h"
 104 #include "tree-data-ref.h"
 105 #include "tree-scalar-evolution.h"
 106 #include "dumpfile.h"
 107 #include "langhooks.h"
 108 #include "tree-affine.h"
 109 #include "params.h"
 110
 111 static struct datadep_stats
 112 {
 113   int num_dependence_tests;
 114   int num_dependence_dependent;
 115   int num_dependence_independent;
 116   int num_dependence_undetermined;
 117
 118   int num_subscript_tests;
 119   int num_subscript_undetermined;
 120   int num_same_subscript_function;
 121
 122   int num_ziv;
 123   int num_ziv_independent;
 124   int num_ziv_dependent;
 125   int num_ziv_unimplemented;
 126
 127   int num_siv;
 128   int num_siv_independent;
 129   int num_siv_dependent;
 130   int num_siv_unimplemented;
 131
 132   int num_miv;
 133   int num_miv_independent;
 134   int num_miv_dependent;
 135   int num_miv_unimplemented;
 136 } dependence_stats;
 137
 138 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
 139                                            struct data_reference *,
 140                                            struct data_reference *,
 141                                            struct loop *);
 142 /* Returns true iff A divides B.  */
 143
 144 static inline bool
 145 tree_fold_divides_p (const_tree a, const_tree b)
 146 {
 147   gcc_assert (TREE_CODE (a) == INTEGER_CST);
 148   gcc_assert (TREE_CODE (b) == INTEGER_CST);
 149   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
 150 }
 151
 152 /* Returns true iff A divides B.  */
 153
 154 static inline bool
 155 int_divides_p (int a, int b)
 156 {
 157   return ((b % a) == 0);
 158 }
 159
 160 \f
 161
 162 /* Dump into FILE all the data references from DATAREFS.  */
 163
 164 static void
 165 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
 166 {
 167   unsigned int i;
 168   struct data_reference *dr;
 169
 170   FOR_EACH_VEC_ELT (datarefs, i, dr)
 171     dump_data_reference (file, dr);
 172 }
 173
 174 /* Unified dump into FILE all the data references from DATAREFS.  */
 175
 176 DEBUG_FUNCTION void
 177 debug (vec<data_reference_p> &ref)
 178 {
 179   dump_data_references (stderr, ref);
 180 }
 181
 182 DEBUG_FUNCTION void
 183 debug (vec<data_reference_p> *ptr)
 184 {
 185   if (ptr)
 186     debug (*ptr);
 187   else
 188     fprintf (stderr, "<nil>\n");
 189 }
 190
 191
 192 /* Dump into STDERR all the data references from DATAREFS.  */
 193
 194 DEBUG_FUNCTION void
 195 debug_data_references (vec<data_reference_p> datarefs)
 196 {
 197   dump_data_references (stderr, datarefs);
 198 }
 199
 200 /* Print to STDERR the data_reference DR.  */
 201
 202 DEBUG_FUNCTION void
 203 debug_data_reference (struct data_reference *dr)
 204 {
 205   dump_data_reference (stderr, dr);
 206 }
 207
 208 /* Dump function for a DATA_REFERENCE structure.  */
 209
 210 void
 211 dump_data_reference (FILE *outf,
 212                      struct data_reference *dr)
 213 {
 214   unsigned int i;
 215
 216   fprintf (outf, "#(Data Ref: \n");
 217   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
 218   fprintf (outf, "#  stmt: ");
 219   print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
 220   fprintf (outf, "#  ref: ");
 221   print_generic_stmt (outf, DR_REF (dr), 0);
 222   fprintf (outf, "#  base_object: ");
 223   print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
 224
 225   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
 226     {
 227       fprintf (outf, "#  Access function %d: ", i);
 228       print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
 229     }
 230   fprintf (outf, "#)\n");
 231 }
 232
 233 /* Unified dump function for a DATA_REFERENCE structure.  */
 234
 235 DEBUG_FUNCTION void
 236 debug (data_reference &ref)
 237 {
 238   dump_data_reference (stderr, &ref);
 239 }
 240
 241 DEBUG_FUNCTION void
 242 debug (data_reference *ptr)
 243 {
 244   if (ptr)
 245     debug (*ptr);
 246   else
 247     fprintf (stderr, "<nil>\n");
 248 }
 249
 250
 251 /* Dumps the affine function described by FN to the file OUTF.  */
 252
 253 static void
 254 dump_affine_function (FILE *outf, affine_fn fn)
 255 {
 256   unsigned i;
 257   tree coef;
 258
 259   print_generic_expr (outf, fn[0], TDF_SLIM);
 260   for (i = 1; fn.iterate (i, &coef); i++)
 261     {
 262       fprintf (outf, " + ");
 263       print_generic_expr (outf, coef, TDF_SLIM);
 264       fprintf (outf, " * x_%u", i);
 265     }
 266 }
 267
 268 /* Dumps the conflict function CF to the file OUTF.  */
 269
 270 static void
 271 dump_conflict_function (FILE *outf, conflict_function *cf)
 272 {
 273   unsigned i;
 274
 275   if (cf->n == NO_DEPENDENCE)
 276     fprintf (outf, "no dependence");
 277   else if (cf->n == NOT_KNOWN)
 278     fprintf (outf, "not known");
 279   else
 280     {
 281       for (i = 0; i < cf->n; i++)
 282         {
 283           if (i != 0)
 284             fprintf (outf, " ");
 285           fprintf (outf, "[");
 286           dump_affine_function (outf, cf->fns[i]);
 287           fprintf (outf, "]");
 288         }
 289     }
 290 }
 291
 292 /* Dump function for a SUBSCRIPT structure.  */
 293
 294 static void
 295 dump_subscript (FILE *outf, struct subscript *subscript)
 296 {
 297   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
 298
 299   fprintf (outf, "\n (subscript \n");
 300   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
 301   dump_conflict_function (outf, cf);
 302   if (CF_NONTRIVIAL_P (cf))
 303     {
 304       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 305       fprintf (outf, "\n  last_conflict: ");
 306       print_generic_expr (outf, last_iteration, 0);
 307     }
 308
 309   cf = SUB_CONFLICTS_IN_B (subscript);
 310   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
 311   dump_conflict_function (outf, cf);
 312   if (CF_NONTRIVIAL_P (cf))
 313     {
 314       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 315       fprintf (outf, "\n  last_conflict: ");
 316       print_generic_expr (outf, last_iteration, 0);
 317     }
 318
 319   fprintf (outf, "\n  (Subscript distance: ");
 320   print_generic_expr (outf, SUB_DISTANCE (subscript), 0);
 321   fprintf (outf, " ))\n");
 322 }
 323
 324 /* Print the classic direction vector DIRV to OUTF.  */
 325
 326 static void
 327 print_direction_vector (FILE *outf,
 328                         lambda_vector dirv,
 329                         int length)
 330 {
 331   int eq;
 332
 333   for (eq = 0; eq < length; eq++)
 334     {
 335       enum data_dependence_direction dir = ((enum data_dependence_direction)
 336                                             dirv[eq]);
 337
 338       switch (dir)
 339         {
 340         case dir_positive:
 341           fprintf (outf, "    +");
 342           break;
 343         case dir_negative:
 344           fprintf (outf, "    -");
 345           break;
 346         case dir_equal:
 347           fprintf (outf, "    =");
 348           break;
 349         case dir_positive_or_equal:
 350           fprintf (outf, "   +=");
 351           break;
 352         case dir_positive_or_negative:
 353           fprintf (outf, "   +-");
 354           break;
 355         case dir_negative_or_equal:
 356           fprintf (outf, "   -=");
 357           break;
 358         case dir_star:
 359           fprintf (outf, "    *");
 360           break;
 361         default:
 362           fprintf (outf, "indep");
 363           break;
 364         }
 365     }
 366   fprintf (outf, "\n");
 367 }
 368
 369 /* Print a vector of direction vectors.  */
 370
 371 static void
 372 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
 373                    int length)
 374 {
 375   unsigned j;
 376   lambda_vector v;
 377
 378   FOR_EACH_VEC_ELT (dir_vects, j, v)
 379     print_direction_vector (outf, v, length);
 380 }
 381
 382 /* Print out a vector VEC of length N to OUTFILE.  */
 383
 384 static inline void
 385 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
 386 {
 387   int i;
 388
 389   for (i = 0; i < n; i++)
 390     fprintf (outfile, "%3d ", vector[i]);
 391   fprintf (outfile, "\n");
 392 }
 393
 394 /* Print a vector of distance vectors.  */
 395
 396 static void
 397 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
 398                     int length)
 399 {
 400   unsigned j;
 401   lambda_vector v;
 402
 403   FOR_EACH_VEC_ELT (dist_vects, j, v)
 404     print_lambda_vector (outf, v, length);
 405 }
 406
 407 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
 408
 409 static void
 410 dump_data_dependence_relation (FILE *outf,
 411                                struct data_dependence_relation *ddr)
 412 {
 413   struct data_reference *dra, *drb;
 414
 415   fprintf (outf, "(Data Dep: \n");
 416
 417   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 418     {
 419       if (ddr)
 420         {
 421           dra = DDR_A (ddr);
 422           drb = DDR_B (ddr);
 423           if (dra)
 424             dump_data_reference (outf, dra);
 425           else
 426             fprintf (outf, "    (nil)\n");
 427           if (drb)
 428             dump_data_reference (outf, drb);
 429           else
 430             fprintf (outf, "    (nil)\n");
 431         }
 432       fprintf (outf, "    (don't know)\n)\n");
 433       return;
 434     }
 435
 436   dra = DDR_A (ddr);
 437   drb = DDR_B (ddr);
 438   dump_data_reference (outf, dra);
 439   dump_data_reference (outf, drb);
 440
 441   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 442     fprintf (outf, "    (no dependence)\n");
 443
 444   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
 445     {
 446       unsigned int i;
 447       struct loop *loopi;
 448
 449       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
 450         {
 451           fprintf (outf, "  access_fn_A: ");
 452           print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
 453           fprintf (outf, "  access_fn_B: ");
 454           print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
 455           dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
 456         }
 457
 458       fprintf (outf, "  inner loop index: %d\n", DDR_INNER_LOOP (ddr));
 459       fprintf (outf, "  loop nest: (");
 460       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
 461         fprintf (outf, "%d ", loopi->num);
 462       fprintf (outf, ")\n");
 463
 464       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
 465         {
 466           fprintf (outf, "  distance_vector: ");
 467           print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
 468                                DDR_NB_LOOPS (ddr));
 469         }
 470
 471       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
 472         {
 473           fprintf (outf, "  direction_vector: ");
 474           print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
 475                                   DDR_NB_LOOPS (ddr));
 476         }
 477     }
 478
 479   fprintf (outf, ")\n");
 480 }
 481
 482 /* Debug version.  */
 483
 484 DEBUG_FUNCTION void
 485 debug_data_dependence_relation (struct data_dependence_relation *ddr)
 486 {
 487   dump_data_dependence_relation (stderr, ddr);
 488 }
 489
 490 /* Dump into FILE all the dependence relations from DDRS.  */
 491
 492 void
 493 dump_data_dependence_relations (FILE *file,
 494                                 vec<ddr_p> ddrs)
 495 {
 496   unsigned int i;
 497   struct data_dependence_relation *ddr;
 498
 499   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 500     dump_data_dependence_relation (file, ddr);
 501 }
 502
 503 DEBUG_FUNCTION void
 504 debug (vec<ddr_p> &ref)
 505 {
 506   dump_data_dependence_relations (stderr, ref);
 507 }
 508
 509 DEBUG_FUNCTION void
 510 debug (vec<ddr_p> *ptr)
 511 {
 512   if (ptr)
 513     debug (*ptr);
 514   else
 515     fprintf (stderr, "<nil>\n");
 516 }
 517
 518
 519 /* Dump to STDERR all the dependence relations from DDRS.  */
 520
 521 DEBUG_FUNCTION void
 522 debug_data_dependence_relations (vec<ddr_p> ddrs)
 523 {
 524   dump_data_dependence_relations (stderr, ddrs);
 525 }
 526
 527 /* Dumps the distance and direction vectors in FILE.  DDRS contains
 528    the dependence relations, and VECT_SIZE is the size of the
 529    dependence vectors, or in other words the number of loops in the
 530    considered nest.  */
 531
 532 static void
 533 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
 534 {
 535   unsigned int i, j;
 536   struct data_dependence_relation *ddr;
 537   lambda_vector v;
 538
 539   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 540     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
 541       {
 542         FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
 543           {
 544             fprintf (file, "DISTANCE_V (");
 545             print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
 546             fprintf (file, ")\n");
 547           }
 548
 549         FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
 550           {
 551             fprintf (file, "DIRECTION_V (");
 552             print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
 553             fprintf (file, ")\n");
 554           }
 555       }
 556
 557   fprintf (file, "\n\n");
 558 }
 559
 560 /* Dumps the data dependence relations DDRS in FILE.  */
 561
 562 static void
 563 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
 564 {
 565   unsigned int i;
 566   struct data_dependence_relation *ddr;
 567
 568   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 569     dump_data_dependence_relation (file, ddr);
 570
 571   fprintf (file, "\n\n");
 572 }
 573
 574 DEBUG_FUNCTION void
 575 debug_ddrs (vec<ddr_p> ddrs)
 576 {
 577   dump_ddrs (stderr, ddrs);
 578 }
 579
 580 /* Helper function for split_constant_offset.  Expresses OP0 CODE OP1
 581    (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
 582    constant of type ssizetype, and returns true.  If we cannot do this
 583    with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
 584    is returned.  */
 585
 586 static bool
 587 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 588                          tree *var, tree *off)
 589 {
 590   tree var0, var1;
 591   tree off0, off1;
 592   enum tree_code ocode = code;
 593
 594   *var = NULL_TREE;
 595   *off = NULL_TREE;
 596
 597   switch (code)
 598     {
 599     case INTEGER_CST:
 600       *var = build_int_cst (type, 0);
 601       *off = fold_convert (ssizetype, op0);
 602       return true;
 603
 604     case POINTER_PLUS_EXPR:
 605       ocode = PLUS_EXPR;
 606       /* FALLTHROUGH */
 607     case PLUS_EXPR:
 608     case MINUS_EXPR:
 609       split_constant_offset (op0, &var0, &off0);
 610       split_constant_offset (op1, &var1, &off1);
 611       *var = fold_build2 (code, type, var0, var1);
 612       *off = size_binop (ocode, off0, off1);
 613       return true;
 614
 615     case MULT_EXPR:
 616       if (TREE_CODE (op1) != INTEGER_CST)
 617         return false;
 618
 619       split_constant_offset (op0, &var0, &off0);
 620       *var = fold_build2 (MULT_EXPR, type, var0, op1);
 621       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
 622       return true;
 623
 624     case ADDR_EXPR:
 625       {
 626         tree base, poffset;
 627         HOST_WIDE_INT pbitsize, pbitpos;
 628         machine_mode pmode;
 629         int punsignedp, pvolatilep;
 630
 631         op0 = TREE_OPERAND (op0, 0);
 632         base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
 633                                     &pmode, &punsignedp, &pvolatilep, false);
 634
 635         if (pbitpos % BITS_PER_UNIT != 0)
 636           return false;
 637         base = build_fold_addr_expr (base);
 638         off0 = ssize_int (pbitpos / BITS_PER_UNIT);
 639
 640         if (poffset)
 641           {
 642             split_constant_offset (poffset, &poffset, &off1);
 643             off0 = size_binop (PLUS_EXPR, off0, off1);
 644             if (POINTER_TYPE_P (TREE_TYPE (base)))
 645               base = fold_build_pointer_plus (base, poffset);
 646             else
 647               base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
 648                                   fold_convert (TREE_TYPE (base), poffset));
 649           }
 650
 651         var0 = fold_convert (type, base);
 652
 653         /* If variable length types are involved, punt, otherwise casts
 654            might be converted into ARRAY_REFs in gimplify_conversion.
 655            To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
 656            possibly no longer appears in current GIMPLE, might resurface.
 657            This perhaps could run
 658            if (CONVERT_EXPR_P (var0))
 659              {
 660                gimplify_conversion (&var0);
 661                // Attempt to fill in any within var0 found ARRAY_REF's
 662                // element size from corresponding op embedded ARRAY_REF,
 663                // if unsuccessful, just punt.
 664              }  */
 665         while (POINTER_TYPE_P (type))
 666           type = TREE_TYPE (type);
 667         if (int_size_in_bytes (type) < 0)
 668           return false;
 669
 670         *var = var0;
 671         *off = off0;
 672         return true;
 673       }
 674
 675     case SSA_NAME:
 676       {
 677         gimple def_stmt = SSA_NAME_DEF_STMT (op0);
 678         enum tree_code subcode;
 679
 680         if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
 681           return false;
 682
 683         var0 = gimple_assign_rhs1 (def_stmt);
 684         subcode = gimple_assign_rhs_code (def_stmt);
 685         var1 = gimple_assign_rhs2 (def_stmt);
 686
 687         return split_constant_offset_1 (type, var0, subcode, var1, var, off);
 688       }
 689     CASE_CONVERT:
 690       {
 691         /* We must not introduce undefined overflow, and we must not change the value.
 692            Hence we're okay if the inner type doesn't overflow to start with
 693            (pointer or signed), the outer type also is an integer or pointer
 694            and the outer precision is at least as large as the inner.  */
 695         tree itype = TREE_TYPE (op0);
 696         if ((POINTER_TYPE_P (itype)
 697              || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
 698             && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
 699             && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
 700           {
 701             split_constant_offset (op0, &var0, off);
 702             *var = fold_convert (type, var0);
 703             return true;
 704           }
 705         return false;
 706       }
 707
 708     default:
 709       return false;
 710     }
 711 }
 712
 713 /* Expresses EXP as VAR + OFF, where off is a constant.  The type of OFF
 714    will be ssizetype.  */
 715
 716 void
 717 split_constant_offset (tree exp, tree *var, tree *off)
 718 {
 719   tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
 720   enum tree_code code;
 721
 722   *var = exp;
 723   *off = ssize_int (0);
 724   STRIP_NOPS (exp);
 725
 726   if (tree_is_chrec (exp)
 727       || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
 728     return;
 729
 730   otype = TREE_TYPE (exp);
 731   code = TREE_CODE (exp);
 732   extract_ops_from_tree (exp, &code, &op0, &op1);
 733   if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
 734     {
 735       *var = fold_convert (type, e);
 736       *off = o;
 737     }
 738 }
 739
 740 /* Returns the address ADDR of an object in a canonical shape (without nop
 741    casts, and with type of pointer to the object).  */
 742
 743 static tree
 744 canonicalize_base_object_address (tree addr)
 745 {
 746   tree orig = addr;
 747
 748   STRIP_NOPS (addr);
 749
 750   /* The base address may be obtained by casting from integer, in that case
 751      keep the cast.  */
 752   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
 753     return orig;
 754
 755   if (TREE_CODE (addr) != ADDR_EXPR)
 756     return addr;
 757
 758   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
 759 }
 760
 761 /* Analyzes the behavior of the memory reference DR in the innermost loop or
 762    basic block that contains it.  Returns true if analysis succeed or false
 763    otherwise.  */
 764
 765 bool
 766 dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
 767 {
 768   gimple stmt = DR_STMT (dr);
 769   struct loop *loop = loop_containing_stmt (stmt);
 770   tree ref = DR_REF (dr);
 771   HOST_WIDE_INT pbitsize, pbitpos;
 772   tree base, poffset;
 773   machine_mode pmode;
 774   int punsignedp, pvolatilep;
 775   affine_iv base_iv, offset_iv;
 776   tree init, dinit, step;
 777   bool in_loop = (loop && loop->num);
 778
 779   if (dump_file && (dump_flags & TDF_DETAILS))
 780     fprintf (dump_file, "analyze_innermost: ");
 781
 782   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
 783                               &pmode, &punsignedp, &pvolatilep, false);
 784   gcc_assert (base != NULL_TREE);
 785
 786   if (pbitpos % BITS_PER_UNIT != 0)
 787     {
 788       if (dump_file && (dump_flags & TDF_DETAILS))
 789         fprintf (dump_file, "failed: bit offset alignment.\n");
 790       return false;
 791     }
 792
 793   if (TREE_CODE (base) == MEM_REF)
 794     {
 795       if (!integer_zerop (TREE_OPERAND (base, 1)))
 796         {
 797           offset_int moff = mem_ref_offset (base);
 798           tree mofft = wide_int_to_tree (sizetype, moff);
 799           if (!poffset)
 800             poffset = mofft;
 801           else
 802             poffset = size_binop (PLUS_EXPR, poffset, mofft);
 803         }
 804       base = TREE_OPERAND (base, 0);
 805     }
 806   else
 807     base = build_fold_addr_expr (base);
 808
 809   if (in_loop)
 810     {
 811       if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
 812                       nest ? true : false))
 813         {
 814           if (nest)
 815             {
 816               if (dump_file && (dump_flags & TDF_DETAILS))
 817                 fprintf (dump_file, "failed: evolution of base is not"
 818                                     " affine.\n");
 819               return false;
 820             }
 821           else
 822             {
 823               base_iv.base = base;
 824               base_iv.step = ssize_int (0);
 825               base_iv.no_overflow = true;
 826             }
 827         }
 828     }
 829   else
 830     {
 831       base_iv.base = base;
 832       base_iv.step = ssize_int (0);
 833       base_iv.no_overflow = true;
 834     }
 835
 836   if (!poffset)
 837     {
 838       offset_iv.base = ssize_int (0);
 839       offset_iv.step = ssize_int (0);
 840     }
 841   else
 842     {
 843       if (!in_loop)
 844         {
 845           offset_iv.base = poffset;
 846           offset_iv.step = ssize_int (0);
 847         }
 848       else if (!simple_iv (loop, loop_containing_stmt (stmt),
 849                            poffset, &offset_iv,
 850                            nest ? true : false))
 851         {
 852           if (nest)
 853             {
 854               if (dump_file && (dump_flags & TDF_DETAILS))
 855                 fprintf (dump_file, "failed: evolution of offset is not"
 856                                     " affine.\n");
 857               return false;
 858             }
 859           else
 860             {
 861               offset_iv.base = poffset;
 862               offset_iv.step = ssize_int (0);
 863             }
 864         }
 865     }
 866
 867   init = ssize_int (pbitpos / BITS_PER_UNIT);
 868   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
 869   init =  size_binop (PLUS_EXPR, init, dinit);
 870   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
 871   init =  size_binop (PLUS_EXPR, init, dinit);
 872
 873   step = size_binop (PLUS_EXPR,
 874                      fold_convert (ssizetype, base_iv.step),
 875                      fold_convert (ssizetype, offset_iv.step));
 876
 877   DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
 878
 879   DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
 880   DR_INIT (dr) = init;
 881   DR_STEP (dr) = step;
 882
 883   DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
 884
 885   if (dump_file && (dump_flags & TDF_DETAILS))
 886     fprintf (dump_file, "success.\n");
 887
 888   return true;
 889 }
 890
 891 /* Determines the base object and the list of indices of memory reference
 892    DR, analyzed in LOOP and instantiated in loop nest NEST.  */
 893
 894 static void
 895 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
 896 {
 897   vec<tree> access_fns = vNULL;
 898   tree ref, op;
 899   tree base, off, access_fn;
 900   basic_block before_loop;
 901
 902   /* If analyzing a basic-block there are no indices to analyze
 903      and thus no access functions.  */
 904   if (!nest)
 905     {
 906       DR_BASE_OBJECT (dr) = DR_REF (dr);
 907       DR_ACCESS_FNS (dr).create (0);
 908       return;
 909     }
 910
 911   ref = DR_REF (dr);
 912   before_loop = block_before_loop (nest);
 913
 914   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
 915      into a two element array with a constant index.  The base is
 916      then just the immediate underlying object.  */
 917   if (TREE_CODE (ref) == REALPART_EXPR)
 918     {
 919       ref = TREE_OPERAND (ref, 0);
 920       access_fns.safe_push (integer_zero_node);
 921     }
 922   else if (TREE_CODE (ref) == IMAGPART_EXPR)
 923     {
 924       ref = TREE_OPERAND (ref, 0);
 925       access_fns.safe_push (integer_one_node);
 926     }
 927
 928   /* Analyze access functions of dimensions we know to be independent.  */
 929   while (handled_component_p (ref))
 930     {
 931       if (TREE_CODE (ref) == ARRAY_REF)
 932         {
 933           op = TREE_OPERAND (ref, 1);
 934           access_fn = analyze_scalar_evolution (loop, op);
 935           access_fn = instantiate_scev (before_loop, loop, access_fn);
 936           access_fns.safe_push (access_fn);
 937         }
 938       else if (TREE_CODE (ref) == COMPONENT_REF
 939                && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
 940         {
 941           /* For COMPONENT_REFs of records (but not unions!) use the
 942              FIELD_DECL offset as constant access function so we can
 943              disambiguate a[i].f1 and a[i].f2.  */
 944           tree off = component_ref_field_offset (ref);
 945           off = size_binop (PLUS_EXPR,
 946                             size_binop (MULT_EXPR,
 947                                         fold_convert (bitsizetype, off),
 948                                         bitsize_int (BITS_PER_UNIT)),
 949                             DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
 950           access_fns.safe_push (off);
 951         }
 952       else
 953         /* If we have an unhandled component we could not translate
 954            to an access function stop analyzing.  We have determined
 955            our base object in this case.  */
 956         break;
 957
 958       ref = TREE_OPERAND (ref, 0);
 959     }
 960
 961   /* If the address operand of a MEM_REF base has an evolution in the
 962      analyzed nest, add it as an additional independent access-function.  */
 963   if (TREE_CODE (ref) == MEM_REF)
 964     {
 965       op = TREE_OPERAND (ref, 0);
 966       access_fn = analyze_scalar_evolution (loop, op);
 967       access_fn = instantiate_scev (before_loop, loop, access_fn);
 968       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
 969         {
 970           tree orig_type;
 971           tree memoff = TREE_OPERAND (ref, 1);
 972           base = initial_condition (access_fn);
 973           orig_type = TREE_TYPE (base);
 974           STRIP_USELESS_TYPE_CONVERSION (base);
 975           split_constant_offset (base, &base, &off);
 976           STRIP_USELESS_TYPE_CONVERSION (base);
 977           /* Fold the MEM_REF offset into the evolutions initial
 978              value to make more bases comparable.  */
 979           if (!integer_zerop (memoff))
 980             {
 981               off = size_binop (PLUS_EXPR, off,
 982                                 fold_convert (ssizetype, memoff));
 983               memoff = build_int_cst (TREE_TYPE (memoff), 0);
 984             }
 985           access_fn = chrec_replace_initial_condition
 986               (access_fn, fold_convert (orig_type, off));
 987           /* ???  This is still not a suitable base object for
 988              dr_may_alias_p - the base object needs to be an
 989              access that covers the object as whole.  With
 990              an evolution in the pointer this cannot be
 991              guaranteed.
 992              As a band-aid, mark the access so we can special-case
 993              it in dr_may_alias_p.  */
 994           ref = fold_build2_loc (EXPR_LOCATION (ref),
 995                                  MEM_REF, TREE_TYPE (ref),
 996                                  base, memoff);
 997           access_fns.safe_push (access_fn);
 998         }
 999     }
1000   else if (DECL_P (ref))
1001     {
1002       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1003       ref = build2 (MEM_REF, TREE_TYPE (ref),
1004                     build_fold_addr_expr (ref),
1005                     build_int_cst (reference_alias_ptr_type (ref), 0));
1006     }
1007
1008   DR_BASE_OBJECT (dr) = ref;
1009   DR_ACCESS_FNS (dr) = access_fns;
1010 }
1011
1012 /* Extracts the alias analysis information from the memory reference DR.  */
1013
1014 static void
1015 dr_analyze_alias (struct data_reference *dr)
1016 {
1017   tree ref = DR_REF (dr);
1018   tree base = get_base_address (ref), addr;
1019
1020   if (INDIRECT_REF_P (base)
1021       || TREE_CODE (base) == MEM_REF)
1022     {
1023       addr = TREE_OPERAND (base, 0);
1024       if (TREE_CODE (addr) == SSA_NAME)
1025         DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1026     }
1027 }
1028
1029 /* Frees data reference DR.  */
1030
1031 void
1032 free_data_ref (data_reference_p dr)
1033 {
1034   DR_ACCESS_FNS (dr).release ();
1035   free (dr);
1036 }
1037
1038 /* Analyzes memory reference MEMREF accessed in STMT.  The reference
1039    is read if IS_READ is true, write otherwise.  Returns the
1040    data_reference description of MEMREF.  NEST is the outermost loop
1041    in which the reference should be instantiated, LOOP is the loop in
1042    which the data reference should be analyzed.  */
1043
1044 struct data_reference *
1045 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
1046                  bool is_read)
1047 {
1048   struct data_reference *dr;
1049
1050   if (dump_file && (dump_flags & TDF_DETAILS))
1051     {
1052       fprintf (dump_file, "Creating dr for ");
1053       print_generic_expr (dump_file, memref, TDF_SLIM);
1054       fprintf (dump_file, "\n");
1055     }
1056
1057   dr = XCNEW (struct data_reference);
1058   DR_STMT (dr) = stmt;
1059   DR_REF (dr) = memref;
1060   DR_IS_READ (dr) = is_read;
1061
1062   dr_analyze_innermost (dr, nest);
1063   dr_analyze_indices (dr, nest, loop);
1064   dr_analyze_alias (dr);
1065
1066   if (dump_file && (dump_flags & TDF_DETAILS))
1067     {
1068       unsigned i;
1069       fprintf (dump_file, "\tbase_address: ");
1070       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1071       fprintf (dump_file, "\n\toffset from base address: ");
1072       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1073       fprintf (dump_file, "\n\tconstant offset from base address: ");
1074       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1075       fprintf (dump_file, "\n\tstep: ");
1076       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1077       fprintf (dump_file, "\n\taligned to: ");
1078       print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1079       fprintf (dump_file, "\n\tbase_object: ");
1080       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1081       fprintf (dump_file, "\n");
1082       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1083         {
1084           fprintf (dump_file, "\tAccess function %d: ", i);
1085           print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1086         }
1087     }
1088
1089   return dr;
1090 }
1091
1092 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1093    expressions.  */
1094 static bool
1095 dr_equal_offsets_p1 (tree offset1, tree offset2)
1096 {
1097   bool res;
1098
1099   STRIP_NOPS (offset1);
1100   STRIP_NOPS (offset2);
1101
1102   if (offset1 == offset2)
1103     return true;
1104
1105   if (TREE_CODE (offset1) != TREE_CODE (offset2)
1106       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1107     return false;
1108
1109   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1110                              TREE_OPERAND (offset2, 0));
1111
1112   if (!res || !BINARY_CLASS_P (offset1))
1113     return res;
1114
1115   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1116                              TREE_OPERAND (offset2, 1));
1117
1118   return res;
1119 }
1120
1121 /* Check if DRA and DRB have equal offsets.  */
1122 bool
1123 dr_equal_offsets_p (struct data_reference *dra,
1124                     struct data_reference *drb)
1125 {
1126   tree offset1, offset2;
1127
1128   offset1 = DR_OFFSET (dra);
1129   offset2 = DR_OFFSET (drb);
1130
1131   return dr_equal_offsets_p1 (offset1, offset2);
1132 }
1133
1134 /* Returns true if FNA == FNB.  */
1135
1136 static bool
1137 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1138 {
1139   unsigned i, n = fna.length ();
1140
1141   if (n != fnb.length ())
1142     return false;
1143
1144   for (i = 0; i < n; i++)
1145     if (!operand_equal_p (fna[i], fnb[i], 0))
1146       return false;
1147
1148   return true;
1149 }
1150
1151 /* If all the functions in CF are the same, returns one of them,
1152    otherwise returns NULL.  */
1153
1154 static affine_fn
1155 common_affine_function (conflict_function *cf)
1156 {
1157   unsigned i;
1158   affine_fn comm;
1159
1160   if (!CF_NONTRIVIAL_P (cf))
1161     return affine_fn ();
1162
1163   comm = cf->fns[0];
1164
1165   for (i = 1; i < cf->n; i++)
1166     if (!affine_function_equal_p (comm, cf->fns[i]))
1167       return affine_fn ();
1168
1169   return comm;
1170 }
1171
1172 /* Returns the base of the affine function FN.  */
1173
1174 static tree
1175 affine_function_base (affine_fn fn)
1176 {
1177   return fn[0];
1178 }
1179
1180 /* Returns true if FN is a constant.  */
1181
1182 static bool
1183 affine_function_constant_p (affine_fn fn)
1184 {
1185   unsigned i;
1186   tree coef;
1187
1188   for (i = 1; fn.iterate (i, &coef); i++)
1189     if (!integer_zerop (coef))
1190       return false;
1191
1192   return true;
1193 }
1194
1195 /* Returns true if FN is the zero constant function.  */
1196
1197 static bool
1198 affine_function_zero_p (affine_fn fn)
1199 {
1200   return (integer_zerop (affine_function_base (fn))
1201           && affine_function_constant_p (fn));
1202 }
1203
1204 /* Returns a signed integer type with the largest precision from TA
1205    and TB.  */
1206
1207 static tree
1208 signed_type_for_types (tree ta, tree tb)
1209 {
1210   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1211     return signed_type_for (ta);
1212   else
1213     return signed_type_for (tb);
1214 }
1215
1216 /* Applies operation OP on affine functions FNA and FNB, and returns the
1217    result.  */
1218
1219 static affine_fn
1220 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1221 {
1222   unsigned i, n, m;
1223   affine_fn ret;
1224   tree coef;
1225
1226   if (fnb.length () > fna.length ())
1227     {
1228       n = fna.length ();
1229       m = fnb.length ();
1230     }
1231   else
1232     {
1233       n = fnb.length ();
1234       m = fna.length ();
1235     }
1236
1237   ret.create (m);
1238   for (i = 0; i < n; i++)
1239     {
1240       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
1241                                          TREE_TYPE (fnb[i]));
1242       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
1243     }
1244
1245   for (; fna.iterate (i, &coef); i++)
1246     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1247                                  coef, integer_zero_node));
1248   for (; fnb.iterate (i, &coef); i++)
1249     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1250                                  integer_zero_node, coef));
1251
1252   return ret;
1253 }
1254
1255 /* Returns the sum of affine functions FNA and FNB.  */
1256
1257 static affine_fn
1258 affine_fn_plus (affine_fn fna, affine_fn fnb)
1259 {
1260   return affine_fn_op (PLUS_EXPR, fna, fnb);
1261 }
1262
1263 /* Returns the difference of affine functions FNA and FNB.  */
1264
1265 static affine_fn
1266 affine_fn_minus (affine_fn fna, affine_fn fnb)
1267 {
1268   return affine_fn_op (MINUS_EXPR, fna, fnb);
1269 }
1270
1271 /* Frees affine function FN.  */
1272
1273 static void
1274 affine_fn_free (affine_fn fn)
1275 {
1276   fn.release ();
1277 }
1278
1279 /* Determine for each subscript in the data dependence relation DDR
1280    the distance.  */
1281
1282 static void
1283 compute_subscript_distance (struct data_dependence_relation *ddr)
1284 {
1285   conflict_function *cf_a, *cf_b;
1286   affine_fn fn_a, fn_b, diff;
1287
1288   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1289     {
1290       unsigned int i;
1291
1292       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1293         {
1294           struct subscript *subscript;
1295
1296           subscript = DDR_SUBSCRIPT (ddr, i);
1297           cf_a = SUB_CONFLICTS_IN_A (subscript);
1298           cf_b = SUB_CONFLICTS_IN_B (subscript);
1299
1300           fn_a = common_affine_function (cf_a);
1301           fn_b = common_affine_function (cf_b);
1302           if (!fn_a.exists () || !fn_b.exists ())
1303             {
1304               SUB_DISTANCE (subscript) = chrec_dont_know;
1305               return;
1306             }
1307           diff = affine_fn_minus (fn_a, fn_b);
1308
1309           if (affine_function_constant_p (diff))
1310             SUB_DISTANCE (subscript) = affine_function_base (diff);
1311           else
1312             SUB_DISTANCE (subscript) = chrec_dont_know;
1313
1314           affine_fn_free (diff);
1315         }
1316     }
1317 }
1318
1319 /* Returns the conflict function for "unknown".  */
1320
1321 static conflict_function *
1322 conflict_fn_not_known (void)
1323 {
1324   conflict_function *fn = XCNEW (conflict_function);
1325   fn->n = NOT_KNOWN;
1326
1327   return fn;
1328 }
1329
1330 /* Returns the conflict function for "independent".  */
1331
1332 static conflict_function *
1333 conflict_fn_no_dependence (void)
1334 {
1335   conflict_function *fn = XCNEW (conflict_function);
1336   fn->n = NO_DEPENDENCE;
1337
1338   return fn;
1339 }
1340
1341 /* Returns true if the address of OBJ is invariant in LOOP.  */
1342
1343 static bool
1344 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1345 {
1346   while (handled_component_p (obj))
1347     {
1348       if (TREE_CODE (obj) == ARRAY_REF)
1349         {
1350           /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1351              need to check the stride and the lower bound of the reference.  */
1352           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1353                                                       loop->num)
1354               || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1355                                                          loop->num))
1356             return false;
1357         }
1358       else if (TREE_CODE (obj) == COMPONENT_REF)
1359         {
1360           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1361                                                       loop->num))
1362             return false;
1363         }
1364       obj = TREE_OPERAND (obj, 0);
1365     }
1366
1367   if (!INDIRECT_REF_P (obj)
1368       && TREE_CODE (obj) != MEM_REF)
1369     return true;
1370
1371   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1372                                                   loop->num);
1373 }
1374
1375 /* Returns false if we can prove that data references A and B do not alias,
1376    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
1377    considered.  */
1378
1379 bool
1380 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1381                 bool loop_nest)
1382 {
1383   tree addr_a = DR_BASE_OBJECT (a);
1384   tree addr_b = DR_BASE_OBJECT (b);
1385
1386   /* If we are not processing a loop nest but scalar code we
1387      do not need to care about possible cross-iteration dependences
1388      and thus can process the full original reference.  Do so,
1389      similar to how loop invariant motion applies extra offset-based
1390      disambiguation.  */
1391   if (!loop_nest)
1392     {
1393       aff_tree off1, off2;
1394       widest_int size1, size2;
1395       get_inner_reference_aff (DR_REF (a), &off1, &size1);
1396       get_inner_reference_aff (DR_REF (b), &off2, &size2);
1397       aff_combination_scale (&off1, -1);
1398       aff_combination_add (&off2, &off1);
1399       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1400         return false;
1401     }
1402
1403   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
1404      do not know the size of the base-object.  So we cannot do any
1405      offset/overlap based analysis but have to rely on points-to
1406      information only.  */
1407   if (TREE_CODE (addr_a) == MEM_REF
1408       && TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME)
1409     {
1410       /* For true dependences we can apply TBAA.  */
1411       if (flag_strict_aliasing
1412           && DR_IS_WRITE (a) && DR_IS_READ (b)
1413           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
1414                                      get_alias_set (DR_REF (b))))
1415         return false;
1416       if (TREE_CODE (addr_b) == MEM_REF)
1417         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1418                                        TREE_OPERAND (addr_b, 0));
1419       else
1420         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1421                                        build_fold_addr_expr (addr_b));
1422     }
1423   else if (TREE_CODE (addr_b) == MEM_REF
1424            && TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME)
1425     {
1426       /* For true dependences we can apply TBAA.  */
1427       if (flag_strict_aliasing
1428           && DR_IS_WRITE (a) && DR_IS_READ (b)
1429           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
1430                                      get_alias_set (DR_REF (b))))
1431         return false;
1432       if (TREE_CODE (addr_a) == MEM_REF)
1433         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1434                                        TREE_OPERAND (addr_b, 0));
1435       else
1436         return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
1437                                        TREE_OPERAND (addr_b, 0));
1438     }
1439
1440   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
1441      that is being subsetted in the loop nest.  */
1442   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1443     return refs_output_dependent_p (addr_a, addr_b);
1444   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1445     return refs_anti_dependent_p (addr_a, addr_b);
1446   return refs_may_alias_p (addr_a, addr_b);
1447 }
1448
1449 /* Initialize a data dependence relation between data accesses A and
1450    B.  NB_LOOPS is the number of loops surrounding the references: the
1451    size of the classic distance/direction vectors.  */
1452
1453 struct data_dependence_relation *
1454 initialize_data_dependence_relation (struct data_reference *a,
1455                                      struct data_reference *b,
1456                                      vec<loop_p> loop_nest)
1457 {
1458   struct data_dependence_relation *res;
1459   unsigned int i;
1460
1461   res = XNEW (struct data_dependence_relation);
1462   DDR_A (res) = a;
1463   DDR_B (res) = b;
1464   DDR_LOOP_NEST (res).create (0);
1465   DDR_REVERSED_P (res) = false;
1466   DDR_SUBSCRIPTS (res).create (0);
1467   DDR_DIR_VECTS (res).create (0);
1468   DDR_DIST_VECTS (res).create (0);
1469
1470   if (a == NULL || b == NULL)
1471     {
1472       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1473       return res;
1474     }
1475
1476   /* If the data references do not alias, then they are independent.  */
1477   if (!dr_may_alias_p (a, b, loop_nest.exists ()))
1478     {
1479       DDR_ARE_DEPENDENT (res) = chrec_known;
1480       return res;
1481     }
1482
1483   /* The case where the references are exactly the same.  */
1484   if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1485     {
1486      if (loop_nest.exists ()
1487         && !object_address_invariant_in_loop_p (loop_nest[0],
1488                                                 DR_BASE_OBJECT (a)))
1489       {
1490         DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1491         return res;
1492       }
1493       DDR_AFFINE_P (res) = true;
1494       DDR_ARE_DEPENDENT (res) = NULL_TREE;
1495       DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
1496       DDR_LOOP_NEST (res) = loop_nest;
1497       DDR_INNER_LOOP (res) = 0;
1498       DDR_SELF_REFERENCE (res) = true;
1499       for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1500        {
1501          struct subscript *subscript;
1502
1503          subscript = XNEW (struct subscript);
1504          SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1505          SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1506          SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1507          SUB_DISTANCE (subscript) = chrec_dont_know;
1508          DDR_SUBSCRIPTS (res).safe_push (subscript);
1509        }
1510       return res;
1511     }
1512
1513   /* If the references do not access the same object, we do not know
1514      whether they alias or not.  */
1515   if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1516     {
1517       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1518       return res;
1519     }
1520
1521   /* If the base of the object is not invariant in the loop nest, we cannot
1522      analyze it.  TODO -- in fact, it would suffice to record that there may
1523      be arbitrary dependences in the loops where the base object varies.  */
1524   if (loop_nest.exists ()
1525       && !object_address_invariant_in_loop_p (loop_nest[0],
1526                                               DR_BASE_OBJECT (a)))
1527     {
1528       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1529       return res;
1530     }
1531
1532   /* If the number of dimensions of the access to not agree we can have
1533      a pointer access to a component of the array element type and an
1534      array access while the base-objects are still the same.  Punt.  */
1535   if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1536     {
1537       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1538       return res;
1539     }
1540
1541   DDR_AFFINE_P (res) = true;
1542   DDR_ARE_DEPENDENT (res) = NULL_TREE;
1543   DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
1544   DDR_LOOP_NEST (res) = loop_nest;
1545   DDR_INNER_LOOP (res) = 0;
1546   DDR_SELF_REFERENCE (res) = false;
1547
1548   for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1549     {
1550       struct subscript *subscript;
1551
1552       subscript = XNEW (struct subscript);
1553       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1554       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1555       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1556       SUB_DISTANCE (subscript) = chrec_dont_know;
1557       DDR_SUBSCRIPTS (res).safe_push (subscript);
1558     }
1559
1560   return res;
1561 }
1562
1563 /* Frees memory used by the conflict function F.  */
1564
1565 static void
1566 free_conflict_function (conflict_function *f)
1567 {
1568   unsigned i;
1569
1570   if (CF_NONTRIVIAL_P (f))
1571     {
1572       for (i = 0; i < f->n; i++)
1573         affine_fn_free (f->fns[i]);
1574     }
1575   free (f);
1576 }
1577
1578 /* Frees memory used by SUBSCRIPTS.  */
1579
1580 static void
1581 free_subscripts (vec<subscript_p> subscripts)
1582 {
1583   unsigned i;
1584   subscript_p s;
1585
1586   FOR_EACH_VEC_ELT (subscripts, i, s)
1587     {
1588       free_conflict_function (s->conflicting_iterations_in_a);
1589       free_conflict_function (s->conflicting_iterations_in_b);
1590       free (s);
1591     }
1592   subscripts.release ();
1593 }
1594
1595 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1596    description.  */
1597
1598 static inline void
1599 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1600                         tree chrec)
1601 {
1602   DDR_ARE_DEPENDENT (ddr) = chrec;
1603   free_subscripts (DDR_SUBSCRIPTS (ddr));
1604   DDR_SUBSCRIPTS (ddr).create (0);
1605 }
1606
1607 /* The dependence relation DDR cannot be represented by a distance
1608    vector.  */
1609
1610 static inline void
1611 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1612 {
1613   if (dump_file && (dump_flags & TDF_DETAILS))
1614     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1615
1616   DDR_AFFINE_P (ddr) = false;
1617 }
1618
1619 \f
1620
1621 /* This section contains the classic Banerjee tests.  */
1622
1623 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1624    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
1625
1626 static inline bool
1627 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1628 {
1629   return (evolution_function_is_constant_p (chrec_a)
1630           && evolution_function_is_constant_p (chrec_b));
1631 }
1632
1633 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1634    variable, i.e., if the SIV (Single Index Variable) test is true.  */
1635
1636 static bool
1637 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1638 {
1639   if ((evolution_function_is_constant_p (chrec_a)
1640        && evolution_function_is_univariate_p (chrec_b))
1641       || (evolution_function_is_constant_p (chrec_b)
1642           && evolution_function_is_univariate_p (chrec_a)))
1643     return true;
1644
1645   if (evolution_function_is_univariate_p (chrec_a)
1646       && evolution_function_is_univariate_p (chrec_b))
1647     {
1648       switch (TREE_CODE (chrec_a))
1649         {
1650         case POLYNOMIAL_CHREC:
1651           switch (TREE_CODE (chrec_b))
1652             {
1653             case POLYNOMIAL_CHREC:
1654               if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1655                 return false;
1656
1657             default:
1658               return true;
1659             }
1660
1661         default:
1662           return true;
1663         }
1664     }
1665
1666   return false;
1667 }
1668
1669 /* Creates a conflict function with N dimensions.  The affine functions
1670    in each dimension follow.  */
1671
1672 static conflict_function *
1673 conflict_fn (unsigned n, ...)
1674 {
1675   unsigned i;
1676   conflict_function *ret = XCNEW (conflict_function);
1677   va_list ap;
1678
1679   gcc_assert (0 < n && n <= MAX_DIM);
1680   va_start (ap, n);
1681
1682   ret->n = n;
1683   for (i = 0; i < n; i++)
1684     ret->fns[i] = va_arg (ap, affine_fn);
1685   va_end (ap);
1686
1687   return ret;
1688 }
1689
1690 /* Returns constant affine function with value CST.  */
1691
1692 static affine_fn
1693 affine_fn_cst (tree cst)
1694 {
1695   affine_fn fn;
1696   fn.create (1);
1697   fn.quick_push (cst);
1698   return fn;
1699 }
1700
1701 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
1702
1703 static affine_fn
1704 affine_fn_univar (tree cst, unsigned dim, tree coef)
1705 {
1706   affine_fn fn;
1707   fn.create (dim + 1);
1708   unsigned i;
1709
1710   gcc_assert (dim > 0);
1711   fn.quick_push (cst);
1712   for (i = 1; i < dim; i++)
1713     fn.quick_push (integer_zero_node);
1714   fn.quick_push (coef);
1715   return fn;
1716 }
1717
1718 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
1719    *OVERLAPS_B are initialized to the functions that describe the
1720    relation between the elements accessed twice by CHREC_A and
1721    CHREC_B.  For k >= 0, the following property is verified:
1722
1723    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
1724
1725 static void
1726 analyze_ziv_subscript (tree chrec_a,
1727                        tree chrec_b,
1728                        conflict_function **overlaps_a,
1729                        conflict_function **overlaps_b,
1730                        tree *last_conflicts)
1731 {
1732   tree type, difference;
1733   dependence_stats.num_ziv++;
1734
1735   if (dump_file && (dump_flags & TDF_DETAILS))
1736     fprintf (dump_file, "(analyze_ziv_subscript \n");
1737
1738   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1739   chrec_a = chrec_convert (type, chrec_a, NULL);
1740   chrec_b = chrec_convert (type, chrec_b, NULL);
1741   difference = chrec_fold_minus (type, chrec_a, chrec_b);
1742
1743   switch (TREE_CODE (difference))
1744     {
1745     case INTEGER_CST:
1746       if (integer_zerop (difference))
1747         {
1748           /* The difference is equal to zero: the accessed index
1749              overlaps for each iteration in the loop.  */
1750           *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1751           *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1752           *last_conflicts = chrec_dont_know;
1753           dependence_stats.num_ziv_dependent++;
1754         }
1755       else
1756         {
1757           /* The accesses do not overlap.  */
1758           *overlaps_a = conflict_fn_no_dependence ();
1759           *overlaps_b = conflict_fn_no_dependence ();
1760           *last_conflicts = integer_zero_node;
1761           dependence_stats.num_ziv_independent++;
1762         }
1763       break;
1764
1765     default:
1766       /* We're not sure whether the indexes overlap.  For the moment,
1767          conservatively answer "don't know".  */
1768       if (dump_file && (dump_flags & TDF_DETAILS))
1769         fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1770
1771       *overlaps_a = conflict_fn_not_known ();
1772       *overlaps_b = conflict_fn_not_known ();
1773       *last_conflicts = chrec_dont_know;
1774       dependence_stats.num_ziv_unimplemented++;
1775       break;
1776     }
1777
1778   if (dump_file && (dump_flags & TDF_DETAILS))
1779     fprintf (dump_file, ")\n");
1780 }
1781
1782 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1783    and only if it fits to the int type.  If this is not the case, or the
1784    bound  on the number of iterations of LOOP could not be derived, returns
1785    chrec_dont_know.  */
1786
1787 static tree
1788 max_stmt_executions_tree (struct loop *loop)
1789 {
1790   widest_int nit;
1791
1792   if (!max_stmt_executions (loop, &nit))
1793     return chrec_dont_know;
1794
1795   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
1796     return chrec_dont_know;
1797
1798   return wide_int_to_tree (unsigned_type_node, nit);
1799 }
1800
1801 /* Determine whether the CHREC is always positive/negative.  If the expression
1802    cannot be statically analyzed, return false, otherwise set the answer into
1803    VALUE.  */
1804
1805 static bool
1806 chrec_is_positive (tree chrec, bool *value)
1807 {
1808   bool value0, value1, value2;
1809   tree end_value, nb_iter;
1810
1811   switch (TREE_CODE (chrec))
1812     {
1813     case POLYNOMIAL_CHREC:
1814       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
1815           || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
1816         return false;
1817
1818       /* FIXME -- overflows.  */
1819       if (value0 == value1)
1820         {
1821           *value = value0;
1822           return true;
1823         }
1824
1825       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
1826          and the proof consists in showing that the sign never
1827          changes during the execution of the loop, from 0 to
1828          loop->nb_iterations.  */
1829       if (!evolution_function_is_affine_p (chrec))
1830         return false;
1831
1832       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
1833       if (chrec_contains_undetermined (nb_iter))
1834         return false;
1835
1836 #if 0
1837       /* TODO -- If the test is after the exit, we may decrease the number of
1838          iterations by one.  */
1839       if (after_exit)
1840         nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
1841 #endif
1842
1843       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
1844
1845       if (!chrec_is_positive (end_value, &value2))
1846         return false;
1847
1848       *value = value0;
1849       return value0 == value1;
1850
1851     case INTEGER_CST:
1852       switch (tree_int_cst_sgn (chrec))
1853         {
1854         case -1:
1855           *value = false;
1856           break;
1857         case 1:
1858           *value = true;
1859           break;
1860         default:
1861           return false;
1862         }
1863       return true;
1864
1865     default:
1866       return false;
1867     }
1868 }
1869
1870
1871 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1872    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
1873    *OVERLAPS_B are initialized to the functions that describe the
1874    relation between the elements accessed twice by CHREC_A and
1875    CHREC_B.  For k >= 0, the following property is verified:
1876
1877    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
1878
1879 static void
1880 analyze_siv_subscript_cst_affine (tree chrec_a,
1881                                   tree chrec_b,
1882                                   conflict_function **overlaps_a,
1883                                   conflict_function **overlaps_b,
1884                                   tree *last_conflicts)
1885 {
1886   bool value0, value1, value2;
1887   tree type, difference, tmp;
1888
1889   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1890   chrec_a = chrec_convert (type, chrec_a, NULL);
1891   chrec_b = chrec_convert (type, chrec_b, NULL);
1892   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1893
1894   /* Special case overlap in the first iteration.  */
1895   if (integer_zerop (difference))
1896     {
1897       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1898       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1899       *last_conflicts = integer_one_node;
1900       return;
1901     }
1902
1903   if (!chrec_is_positive (initial_condition (difference), &value0))
1904     {
1905       if (dump_file && (dump_flags & TDF_DETAILS))
1906         fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1907
1908       dependence_stats.num_siv_unimplemented++;
1909       *overlaps_a = conflict_fn_not_known ();
1910       *overlaps_b = conflict_fn_not_known ();
1911       *last_conflicts = chrec_dont_know;
1912       return;
1913     }
1914   else
1915     {
1916       if (value0 == false)
1917         {
1918           if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1919             {
1920               if (dump_file && (dump_flags & TDF_DETAILS))
1921                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1922
1923               *overlaps_a = conflict_fn_not_known ();
1924               *overlaps_b = conflict_fn_not_known ();
1925               *last_conflicts = chrec_dont_know;
1926               dependence_stats.num_siv_unimplemented++;
1927               return;
1928             }
1929           else
1930             {
1931               if (value1 == true)
1932                 {
1933                   /* Example:
1934                      chrec_a = 12
1935                      chrec_b = {10, +, 1}
1936                   */
1937
1938                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1939                     {
1940                       HOST_WIDE_INT numiter;
1941                       struct loop *loop = get_chrec_loop (chrec_b);
1942
1943                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1944                       tmp = fold_build2 (EXACT_DIV_EXPR, type,
1945                                          fold_build1 (ABS_EXPR, type, difference),
1946                                          CHREC_RIGHT (chrec_b));
1947                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1948                       *last_conflicts = integer_one_node;
1949
1950
1951                       /* Perform weak-zero siv test to see if overlap is
1952                          outside the loop bounds.  */
1953                       numiter = max_stmt_executions_int (loop);
1954
1955                       if (numiter >= 0
1956                           && compare_tree_int (tmp, numiter) > 0)
1957                         {
1958                           free_conflict_function (*overlaps_a);
1959                           free_conflict_function (*overlaps_b);
1960                           *overlaps_a = conflict_fn_no_dependence ();
1961                           *overlaps_b = conflict_fn_no_dependence ();
1962                           *last_conflicts = integer_zero_node;
1963                           dependence_stats.num_siv_independent++;
1964                           return;
1965                         }
1966                       dependence_stats.num_siv_dependent++;
1967                       return;
1968                     }
1969
1970                   /* When the step does not divide the difference, there are
1971                      no overlaps.  */
1972                   else
1973                     {
1974                       *overlaps_a = conflict_fn_no_dependence ();
1975                       *overlaps_b = conflict_fn_no_dependence ();
1976                       *last_conflicts = integer_zero_node;
1977                       dependence_stats.num_siv_independent++;
1978                       return;
1979                     }
1980                 }
1981
1982               else
1983                 {
1984                   /* Example:
1985                      chrec_a = 12
1986                      chrec_b = {10, +, -1}
1987
1988                      In this case, chrec_a will not overlap with chrec_b.  */
1989                   *overlaps_a = conflict_fn_no_dependence ();
1990                   *overlaps_b = conflict_fn_no_dependence ();
1991                   *last_conflicts = integer_zero_node;
1992                   dependence_stats.num_siv_independent++;
1993                   return;
1994                 }
1995             }
1996         }
1997       else
1998         {
1999           if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
2000             {
2001               if (dump_file && (dump_flags & TDF_DETAILS))
2002                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
2003
2004               *overlaps_a = conflict_fn_not_known ();
2005               *overlaps_b = conflict_fn_not_known ();
2006               *last_conflicts = chrec_dont_know;
2007               dependence_stats.num_siv_unimplemented++;
2008               return;
2009             }
2010           else
2011             {
2012               if (value2 == false)
2013                 {
2014                   /* Example:
2015                      chrec_a = 3
2016                      chrec_b = {10, +, -1}
2017                   */
2018                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
2019                     {
2020                       HOST_WIDE_INT numiter;
2021                       struct loop *loop = get_chrec_loop (chrec_b);
2022
2023                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2024                       tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
2025                                          CHREC_RIGHT (chrec_b));
2026                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
2027                       *last_conflicts = integer_one_node;
2028
2029                       /* Perform weak-zero siv test to see if overlap is
2030                          outside the loop bounds.  */
2031                       numiter = max_stmt_executions_int (loop);
2032
2033                       if (numiter >= 0
2034                           && compare_tree_int (tmp, numiter) > 0)
2035                         {
2036                           free_conflict_function (*overlaps_a);
2037                           free_conflict_function (*overlaps_b);
2038                           *overlaps_a = conflict_fn_no_dependence ();
2039                           *overlaps_b = conflict_fn_no_dependence ();
2040                           *last_conflicts = integer_zero_node;
2041                           dependence_stats.num_siv_independent++;
2042                           return;
2043                         }
2044                       dependence_stats.num_siv_dependent++;
2045                       return;
2046                     }
2047
2048                   /* When the step does not divide the difference, there
2049                      are no overlaps.  */
2050                   else
2051                     {
2052                       *overlaps_a = conflict_fn_no_dependence ();
2053                       *overlaps_b = conflict_fn_no_dependence ();
2054                       *last_conflicts = integer_zero_node;
2055                       dependence_stats.num_siv_independent++;
2056                       return;
2057                     }
2058                 }
2059               else
2060                 {
2061                   /* Example:
2062                      chrec_a = 3
2063                      chrec_b = {4, +, 1}
2064
2065                      In this case, chrec_a will not overlap with chrec_b.  */
2066                   *overlaps_a = conflict_fn_no_dependence ();
2067                   *overlaps_b = conflict_fn_no_dependence ();
2068                   *last_conflicts = integer_zero_node;
2069                   dependence_stats.num_siv_independent++;
2070                   return;
2071                 }
2072             }
2073         }
2074     }
2075 }
2076
2077 /* Helper recursive function for initializing the matrix A.  Returns
2078    the initial value of CHREC.  */
2079
2080 static tree
2081 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
2082 {
2083   gcc_assert (chrec);
2084
2085   switch (TREE_CODE (chrec))
2086     {
2087     case POLYNOMIAL_CHREC:
2088       gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
2089
2090       A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
2091       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
2092
2093     case PLUS_EXPR:
2094     case MULT_EXPR:
2095     case MINUS_EXPR:
2096       {
2097         tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2098         tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
2099
2100         return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
2101       }
2102
2103     CASE_CONVERT:
2104       {
2105         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2106         return chrec_convert (chrec_type (chrec), op, NULL);
2107       }
2108
2109     case BIT_NOT_EXPR:
2110       {
2111         /* Handle ~X as -1 - X.  */
2112         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2113         return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
2114                               build_int_cst (TREE_TYPE (chrec), -1), op);
2115       }
2116
2117     case INTEGER_CST:
2118       return chrec;
2119
2120     default:
2121       gcc_unreachable ();
2122       return NULL_TREE;
2123     }
2124 }
2125
2126 #define FLOOR_DIV(x,y) ((x) / (y))
2127
2128 /* Solves the special case of the Diophantine equation:
2129    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2130
2131    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
2132    number of iterations that loops X and Y run.  The overlaps will be
2133    constructed as evolutions in dimension DIM.  */
2134
2135 static void
2136 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
2137                                          affine_fn *overlaps_a,
2138                                          affine_fn *overlaps_b,
2139                                          tree *last_conflicts, int dim)
2140 {
2141   if (((step_a > 0 && step_b > 0)
2142        || (step_a < 0 && step_b < 0)))
2143     {
2144       int step_overlaps_a, step_overlaps_b;
2145       int gcd_steps_a_b, last_conflict, tau2;
2146
2147       gcd_steps_a_b = gcd (step_a, step_b);
2148       step_overlaps_a = step_b / gcd_steps_a_b;
2149       step_overlaps_b = step_a / gcd_steps_a_b;
2150
2151       if (niter > 0)
2152         {
2153           tau2 = FLOOR_DIV (niter, step_overlaps_a);
2154           tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
2155           last_conflict = tau2;
2156           *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2157         }
2158       else
2159         *last_conflicts = chrec_dont_know;
2160
2161       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
2162                                       build_int_cst (NULL_TREE,
2163                                                      step_overlaps_a));
2164       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
2165                                       build_int_cst (NULL_TREE,
2166                                                      step_overlaps_b));
2167     }
2168
2169   else
2170     {
2171       *overlaps_a = affine_fn_cst (integer_zero_node);
2172       *overlaps_b = affine_fn_cst (integer_zero_node);
2173       *last_conflicts = integer_zero_node;
2174     }
2175 }
2176
2177 /* Solves the special case of a Diophantine equation where CHREC_A is
2178    an affine bivariate function, and CHREC_B is an affine univariate
2179    function.  For example,
2180
2181    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2182
2183    has the following overlapping functions:
2184
2185    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2186    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2187    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2188
2189    FORNOW: This is a specialized implementation for a case occurring in
2190    a common benchmark.  Implement the general algorithm.  */
2191
2192 static void
2193 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2194                                       conflict_function **overlaps_a,
2195                                       conflict_function **overlaps_b,
2196                                       tree *last_conflicts)
2197 {
2198   bool xz_p, yz_p, xyz_p;
2199   int step_x, step_y, step_z;
2200   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2201   affine_fn overlaps_a_xz, overlaps_b_xz;
2202   affine_fn overlaps_a_yz, overlaps_b_yz;
2203   affine_fn overlaps_a_xyz, overlaps_b_xyz;
2204   affine_fn ova1, ova2, ovb;
2205   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2206
2207   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2208   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2209   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2210
2211   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
2212   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
2213   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
2214
2215   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2216     {
2217       if (dump_file && (dump_flags & TDF_DETAILS))
2218         fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2219
2220       *overlaps_a = conflict_fn_not_known ();
2221       *overlaps_b = conflict_fn_not_known ();
2222       *last_conflicts = chrec_dont_know;
2223       return;
2224     }
2225
2226   niter = MIN (niter_x, niter_z);
2227   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2228                                            &overlaps_a_xz,
2229                                            &overlaps_b_xz,
2230                                            &last_conflicts_xz, 1);
2231   niter = MIN (niter_y, niter_z);
2232   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2233                                            &overlaps_a_yz,
2234                                            &overlaps_b_yz,
2235                                            &last_conflicts_yz, 2);
2236   niter = MIN (niter_x, niter_z);
2237   niter = MIN (niter_y, niter);
2238   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2239                                            &overlaps_a_xyz,
2240                                            &overlaps_b_xyz,
2241                                            &last_conflicts_xyz, 3);
2242
2243   xz_p = !integer_zerop (last_conflicts_xz);
2244   yz_p = !integer_zerop (last_conflicts_yz);
2245   xyz_p = !integer_zerop (last_conflicts_xyz);
2246
2247   if (xz_p || yz_p || xyz_p)
2248     {
2249       ova1 = affine_fn_cst (integer_zero_node);
2250       ova2 = affine_fn_cst (integer_zero_node);
2251       ovb = affine_fn_cst (integer_zero_node);
2252       if (xz_p)
2253         {
2254           affine_fn t0 = ova1;
2255           affine_fn t2 = ovb;
2256
2257           ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2258           ovb = affine_fn_plus (ovb, overlaps_b_xz);
2259           affine_fn_free (t0);
2260           affine_fn_free (t2);
2261           *last_conflicts = last_conflicts_xz;
2262         }
2263       if (yz_p)
2264         {
2265           affine_fn t0 = ova2;
2266           affine_fn t2 = ovb;
2267
2268           ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2269           ovb = affine_fn_plus (ovb, overlaps_b_yz);
2270           affine_fn_free (t0);
2271           affine_fn_free (t2);
2272           *last_conflicts = last_conflicts_yz;
2273         }
2274       if (xyz_p)
2275         {
2276           affine_fn t0 = ova1;
2277           affine_fn t2 = ova2;
2278           affine_fn t4 = ovb;
2279
2280           ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2281           ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2282           ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2283           affine_fn_free (t0);
2284           affine_fn_free (t2);
2285           affine_fn_free (t4);
2286           *last_conflicts = last_conflicts_xyz;
2287         }
2288       *overlaps_a = conflict_fn (2, ova1, ova2);
2289       *overlaps_b = conflict_fn (1, ovb);
2290     }
2291   else
2292     {
2293       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2294       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2295       *last_conflicts = integer_zero_node;
2296     }
2297
2298   affine_fn_free (overlaps_a_xz);
2299   affine_fn_free (overlaps_b_xz);
2300   affine_fn_free (overlaps_a_yz);
2301   affine_fn_free (overlaps_b_yz);
2302   affine_fn_free (overlaps_a_xyz);
2303   affine_fn_free (overlaps_b_xyz);
2304 }
2305
2306 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
2307
2308 static void
2309 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2310                     int size)
2311 {
2312   memcpy (vec2, vec1, size * sizeof (*vec1));
2313 }
2314
2315 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
2316
2317 static void
2318 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2319                     int m, int n)
2320 {
2321   int i;
2322
2323   for (i = 0; i < m; i++)
2324     lambda_vector_copy (mat1[i], mat2[i], n);
2325 }
2326
2327 /* Store the N x N identity matrix in MAT.  */
2328
2329 static void
2330 lambda_matrix_id (lambda_matrix mat, int size)
2331 {
2332   int i, j;
2333
2334   for (i = 0; i < size; i++)
2335     for (j = 0; j < size; j++)
2336       mat[i][j] = (i == j) ? 1 : 0;
2337 }
2338
2339 /* Return the first nonzero element of vector VEC1 between START and N.
2340    We must have START <= N.   Returns N if VEC1 is the zero vector.  */
2341
2342 static int
2343 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2344 {
2345   int j = start;
2346   while (j < n && vec1[j] == 0)
2347     j++;
2348   return j;
2349 }
2350
2351 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2352    R2 = R2 + CONST1 * R1.  */
2353
2354 static void
2355 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2356 {
2357   int i;
2358
2359   if (const1 == 0)
2360     return;
2361
2362   for (i = 0; i < n; i++)
2363     mat[r2][i] += const1 * mat[r1][i];
2364 }
2365
2366 /* Swap rows R1 and R2 in matrix MAT.  */
2367
2368 static void
2369 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2370 {
2371   lambda_vector row;
2372
2373   row = mat[r1];
2374   mat[r1] = mat[r2];
2375   mat[r2] = row;
2376 }
2377
2378 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2379    and store the result in VEC2.  */
2380
2381 static void
2382 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2383                           int size, int const1)
2384 {
2385   int i;
2386
2387   if (const1 == 0)
2388     lambda_vector_clear (vec2, size);
2389   else
2390     for (i = 0; i < size; i++)
2391       vec2[i] = const1 * vec1[i];
2392 }
2393
2394 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
2395
2396 static void
2397 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2398                       int size)
2399 {
2400   lambda_vector_mult_const (vec1, vec2, size, -1);
2401 }
2402
2403 /* Negate row R1 of matrix MAT which has N columns.  */
2404
2405 static void
2406 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2407 {
2408   lambda_vector_negate (mat[r1], mat[r1], n);
2409 }
2410
2411 /* Return true if two vectors are equal.  */
2412
2413 static bool
2414 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2415 {
2416   int i;
2417   for (i = 0; i < size; i++)
2418     if (vec1[i] != vec2[i])
2419       return false;
2420   return true;
2421 }
2422
2423 /* Given an M x N integer matrix A, this function determines an M x
2424    M unimodular matrix U, and an M x N echelon matrix S such that
2425    "U.A = S".  This decomposition is also known as "right Hermite".
2426
2427    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2428    Restructuring Compilers" Utpal Banerjee.  */
2429
2430 static void
2431 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2432                              lambda_matrix S, lambda_matrix U)
2433 {
2434   int i, j, i0 = 0;
2435
2436   lambda_matrix_copy (A, S, m, n);
2437   lambda_matrix_id (U, m);
2438
2439   for (j = 0; j < n; j++)
2440     {
2441       if (lambda_vector_first_nz (S[j], m, i0) < m)
2442         {
2443           ++i0;
2444           for (i = m - 1; i >= i0; i--)
2445             {
2446               while (S[i][j] != 0)
2447                 {
2448                   int sigma, factor, a, b;
2449
2450                   a = S[i-1][j];
2451                   b = S[i][j];
2452                   sigma = (a * b < 0) ? -1: 1;
2453                   a = abs (a);
2454                   b = abs (b);
2455                   factor = sigma * (a / b);
2456
2457                   lambda_matrix_row_add (S, n, i, i-1, -factor);
2458                   lambda_matrix_row_exchange (S, i, i-1);
2459
2460                   lambda_matrix_row_add (U, m, i, i-1, -factor);
2461                   lambda_matrix_row_exchange (U, i, i-1);
2462                 }
2463             }
2464         }
2465     }
2466 }
2467
2468 /* Determines the overlapping elements due to accesses CHREC_A and
2469    CHREC_B, that are affine functions.  This function cannot handle
2470    symbolic evolution functions, ie. when initial conditions are
2471    parameters, because it uses lambda matrices of integers.  */
2472
2473 static void
2474 analyze_subscript_affine_affine (tree chrec_a,
2475                                  tree chrec_b,
2476                                  conflict_function **overlaps_a,
2477                                  conflict_function **overlaps_b,
2478                                  tree *last_conflicts)
2479 {
2480   unsigned nb_vars_a, nb_vars_b, dim;
2481   HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2482   lambda_matrix A, U, S;
2483   struct obstack scratch_obstack;
2484
2485   if (eq_evolutions_p (chrec_a, chrec_b))
2486     {
2487       /* The accessed index overlaps for each iteration in the
2488          loop.  */
2489       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2490       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2491       *last_conflicts = chrec_dont_know;
2492       return;
2493     }
2494   if (dump_file && (dump_flags & TDF_DETAILS))
2495     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2496
2497   /* For determining the initial intersection, we have to solve a
2498      Diophantine equation.  This is the most time consuming part.
2499
2500      For answering to the question: "Is there a dependence?" we have
2501      to prove that there exists a solution to the Diophantine
2502      equation, and that the solution is in the iteration domain,
2503      i.e. the solution is positive or zero, and that the solution
2504      happens before the upper bound loop.nb_iterations.  Otherwise
2505      there is no dependence.  This function outputs a description of
2506      the iterations that hold the intersections.  */
2507
2508   nb_vars_a = nb_vars_in_chrec (chrec_a);
2509   nb_vars_b = nb_vars_in_chrec (chrec_b);
2510
2511   gcc_obstack_init (&scratch_obstack);
2512
2513   dim = nb_vars_a + nb_vars_b;
2514   U = lambda_matrix_new (dim, dim, &scratch_obstack);
2515   A = lambda_matrix_new (dim, 1, &scratch_obstack);
2516   S = lambda_matrix_new (dim, 1, &scratch_obstack);
2517
2518   init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2519   init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2520   gamma = init_b - init_a;
2521
2522   /* Don't do all the hard work of solving the Diophantine equation
2523      when we already know the solution: for example,
2524      | {3, +, 1}_1
2525      | {3, +, 4}_2
2526      | gamma = 3 - 3 = 0.
2527      Then the first overlap occurs during the first iterations:
2528      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2529   */
2530   if (gamma == 0)
2531     {
2532       if (nb_vars_a == 1 && nb_vars_b == 1)
2533         {
2534           HOST_WIDE_INT step_a, step_b;
2535           HOST_WIDE_INT niter, niter_a, niter_b;
2536           affine_fn ova, ovb;
2537
2538           niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
2539           niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
2540           niter = MIN (niter_a, niter_b);
2541           step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2542           step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2543
2544           compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2545                                                    &ova, &ovb,
2546                                                    last_conflicts, 1);
2547           *overlaps_a = conflict_fn (1, ova);
2548           *overlaps_b = conflict_fn (1, ovb);
2549         }
2550
2551       else if (nb_vars_a == 2 && nb_vars_b == 1)
2552         compute_overlap_steps_for_affine_1_2
2553           (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2554
2555       else if (nb_vars_a == 1 && nb_vars_b == 2)
2556         compute_overlap_steps_for_affine_1_2
2557           (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2558
2559       else
2560         {
2561           if (dump_file && (dump_flags & TDF_DETAILS))
2562             fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2563           *overlaps_a = conflict_fn_not_known ();
2564           *overlaps_b = conflict_fn_not_known ();
2565           *last_conflicts = chrec_dont_know;
2566         }
2567       goto end_analyze_subs_aa;
2568     }
2569
2570   /* U.A = S */
2571   lambda_matrix_right_hermite (A, dim, 1, S, U);
2572
2573   if (S[0][0] < 0)
2574     {
2575       S[0][0] *= -1;
2576       lambda_matrix_row_negate (U, dim, 0);
2577     }
2578   gcd_alpha_beta = S[0][0];
2579
2580   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2581      but that is a quite strange case.  Instead of ICEing, answer
2582      don't know.  */
2583   if (gcd_alpha_beta == 0)
2584     {
2585       *overlaps_a = conflict_fn_not_known ();
2586       *overlaps_b = conflict_fn_not_known ();
2587       *last_conflicts = chrec_dont_know;
2588       goto end_analyze_subs_aa;
2589     }
2590
2591   /* The classic "gcd-test".  */
2592   if (!int_divides_p (gcd_alpha_beta, gamma))
2593     {
2594       /* The "gcd-test" has determined that there is no integer
2595          solution, i.e. there is no dependence.  */
2596       *overlaps_a = conflict_fn_no_dependence ();
2597       *overlaps_b = conflict_fn_no_dependence ();
2598       *last_conflicts = integer_zero_node;
2599     }
2600
2601   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
2602   else if (nb_vars_a == 1 && nb_vars_b == 1)
2603     {
2604       /* Both functions should have the same evolution sign.  */
2605       if (((A[0][0] > 0 && -A[1][0] > 0)
2606            || (A[0][0] < 0 && -A[1][0] < 0)))
2607         {
2608           /* The solutions are given by:
2609              |
2610              | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
2611              |                           [u21 u22]    [y0]
2612
2613              For a given integer t.  Using the following variables,
2614
2615              | i0 = u11 * gamma / gcd_alpha_beta
2616              | j0 = u12 * gamma / gcd_alpha_beta
2617              | i1 = u21
2618              | j1 = u22
2619
2620              the solutions are:
2621
2622              | x0 = i0 + i1 * t,
2623              | y0 = j0 + j1 * t.  */
2624           HOST_WIDE_INT i0, j0, i1, j1;
2625
2626           i0 = U[0][0] * gamma / gcd_alpha_beta;
2627           j0 = U[0][1] * gamma / gcd_alpha_beta;
2628           i1 = U[1][0];
2629           j1 = U[1][1];
2630
2631           if ((i1 == 0 && i0 < 0)
2632               || (j1 == 0 && j0 < 0))
2633             {
2634               /* There is no solution.
2635                  FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2636                  falls in here, but for the moment we don't look at the
2637                  upper bound of the iteration domain.  */
2638               *overlaps_a = conflict_fn_no_dependence ();
2639               *overlaps_b = conflict_fn_no_dependence ();
2640               *last_conflicts = integer_zero_node;
2641               goto end_analyze_subs_aa;
2642             }
2643
2644           if (i1 > 0 && j1 > 0)
2645             {
2646               HOST_WIDE_INT niter_a
2647                 = max_stmt_executions_int (get_chrec_loop (chrec_a));
2648               HOST_WIDE_INT niter_b
2649                 = max_stmt_executions_int (get_chrec_loop (chrec_b));
2650               HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2651
2652               /* (X0, Y0) is a solution of the Diophantine equation:
2653                  "chrec_a (X0) = chrec_b (Y0)".  */
2654               HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2655                                         CEIL (-j0, j1));
2656               HOST_WIDE_INT x0 = i1 * tau1 + i0;
2657               HOST_WIDE_INT y0 = j1 * tau1 + j0;
2658
2659               /* (X1, Y1) is the smallest positive solution of the eq
2660                  "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2661                  first conflict occurs.  */
2662               HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2663               HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2664               HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2665
2666               if (niter > 0)
2667                 {
2668                   HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2669                                             FLOOR_DIV (niter - j0, j1));
2670                   HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2671
2672                   /* If the overlap occurs outside of the bounds of the
2673                      loop, there is no dependence.  */
2674                   if (x1 >= niter || y1 >= niter)
2675                     {
2676                       *overlaps_a = conflict_fn_no_dependence ();
2677                       *overlaps_b = conflict_fn_no_dependence ();
2678                       *last_conflicts = integer_zero_node;
2679                       goto end_analyze_subs_aa;
2680                     }
2681                   else
2682                     *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2683                 }
2684               else
2685                 *last_conflicts = chrec_dont_know;
2686
2687               *overlaps_a
2688                 = conflict_fn (1,
2689                                affine_fn_univar (build_int_cst (NULL_TREE, x1),
2690                                                  1,
2691                                                  build_int_cst (NULL_TREE, i1)));
2692               *overlaps_b
2693                 = conflict_fn (1,
2694                                affine_fn_univar (build_int_cst (NULL_TREE, y1),
2695                                                  1,
2696                                                  build_int_cst (NULL_TREE, j1)));
2697             }
2698           else
2699             {
2700               /* FIXME: For the moment, the upper bound of the
2701                  iteration domain for i and j is not checked.  */
2702               if (dump_file && (dump_flags & TDF_DETAILS))
2703                 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2704               *overlaps_a = conflict_fn_not_known ();
2705               *overlaps_b = conflict_fn_not_known ();
2706               *last_conflicts = chrec_dont_know;
2707             }
2708         }
2709       else
2710         {
2711           if (dump_file && (dump_flags & TDF_DETAILS))
2712             fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2713           *overlaps_a = conflict_fn_not_known ();
2714           *overlaps_b = conflict_fn_not_known ();
2715           *last_conflicts = chrec_dont_know;
2716         }
2717     }
2718   else
2719     {
2720       if (dump_file && (dump_flags & TDF_DETAILS))
2721         fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2722       *overlaps_a = conflict_fn_not_known ();
2723       *overlaps_b = conflict_fn_not_known ();
2724       *last_conflicts = chrec_dont_know;
2725     }
2726
2727 end_analyze_subs_aa:
2728   obstack_free (&scratch_obstack, NULL);
2729   if (dump_file && (dump_flags & TDF_DETAILS))
2730     {
2731       fprintf (dump_file, "  (overlaps_a = ");
2732       dump_conflict_function (dump_file, *overlaps_a);
2733       fprintf (dump_file, ")\n  (overlaps_b = ");
2734       dump_conflict_function (dump_file, *overlaps_b);
2735       fprintf (dump_file, "))\n");
2736     }
2737 }
2738
2739 /* Returns true when analyze_subscript_affine_affine can be used for
2740    determining the dependence relation between chrec_a and chrec_b,
2741    that contain symbols.  This function modifies chrec_a and chrec_b
2742    such that the analysis result is the same, and such that they don't
2743    contain symbols, and then can safely be passed to the analyzer.
2744
2745    Example: The analysis of the following tuples of evolutions produce
2746    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2747    vs. {0, +, 1}_1
2748
2749    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2750    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2751 */
2752
2753 static bool
2754 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2755 {
2756   tree diff, type, left_a, left_b, right_b;
2757
2758   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2759       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2760     /* FIXME: For the moment not handled.  Might be refined later.  */
2761     return false;
2762
2763   type = chrec_type (*chrec_a);
2764   left_a = CHREC_LEFT (*chrec_a);
2765   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2766   diff = chrec_fold_minus (type, left_a, left_b);
2767
2768   if (!evolution_function_is_constant_p (diff))
2769     return false;
2770
2771   if (dump_file && (dump_flags & TDF_DETAILS))
2772     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2773
2774   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2775                                      diff, CHREC_RIGHT (*chrec_a));
2776   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2777   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2778                                      build_int_cst (type, 0),
2779                                      right_b);
2780   return true;
2781 }
2782
2783 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
2784    *OVERLAPS_B are initialized to the functions that describe the
2785    relation between the elements accessed twice by CHREC_A and
2786    CHREC_B.  For k >= 0, the following property is verified:
2787
2788    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2789
2790 static void
2791 analyze_siv_subscript (tree chrec_a,
2792                        tree chrec_b,
2793                        conflict_function **overlaps_a,
2794                        conflict_function **overlaps_b,
2795                        tree *last_conflicts,
2796                        int loop_nest_num)
2797 {
2798   dependence_stats.num_siv++;
2799
2800   if (dump_file && (dump_flags & TDF_DETAILS))
2801     fprintf (dump_file, "(analyze_siv_subscript \n");
2802
2803   if (evolution_function_is_constant_p (chrec_a)
2804       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2805     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2806                                       overlaps_a, overlaps_b, last_conflicts);
2807
2808   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2809            && evolution_function_is_constant_p (chrec_b))
2810     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2811                                       overlaps_b, overlaps_a, last_conflicts);
2812
2813   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2814            && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2815     {
2816       if (!chrec_contains_symbols (chrec_a)
2817           && !chrec_contains_symbols (chrec_b))
2818         {
2819           analyze_subscript_affine_affine (chrec_a, chrec_b,
2820                                            overlaps_a, overlaps_b,
2821                                            last_conflicts);
2822
2823           if (CF_NOT_KNOWN_P (*overlaps_a)
2824               || CF_NOT_KNOWN_P (*overlaps_b))
2825             dependence_stats.num_siv_unimplemented++;
2826           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2827                    || CF_NO_DEPENDENCE_P (*overlaps_b))
2828             dependence_stats.num_siv_independent++;
2829           else
2830             dependence_stats.num_siv_dependent++;
2831         }
2832       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2833                                                         &chrec_b))
2834         {
2835           analyze_subscript_affine_affine (chrec_a, chrec_b,
2836                                            overlaps_a, overlaps_b,
2837                                            last_conflicts);
2838
2839           if (CF_NOT_KNOWN_P (*overlaps_a)
2840               || CF_NOT_KNOWN_P (*overlaps_b))
2841             dependence_stats.num_siv_unimplemented++;
2842           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2843                    || CF_NO_DEPENDENCE_P (*overlaps_b))
2844             dependence_stats.num_siv_independent++;
2845           else
2846             dependence_stats.num_siv_dependent++;
2847         }
2848       else
2849         goto siv_subscript_dontknow;
2850     }
2851
2852   else
2853     {
2854     siv_subscript_dontknow:;
2855       if (dump_file && (dump_flags & TDF_DETAILS))
2856         fprintf (dump_file, "  siv test failed: unimplemented");
2857       *overlaps_a = conflict_fn_not_known ();
2858       *overlaps_b = conflict_fn_not_known ();
2859       *last_conflicts = chrec_dont_know;
2860       dependence_stats.num_siv_unimplemented++;
2861     }
2862
2863   if (dump_file && (dump_flags & TDF_DETAILS))
2864     fprintf (dump_file, ")\n");
2865 }
2866
2867 /* Returns false if we can prove that the greatest common divisor of the steps
2868    of CHREC does not divide CST, false otherwise.  */
2869
2870 static bool
2871 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2872 {
2873   HOST_WIDE_INT cd = 0, val;
2874   tree step;
2875
2876   if (!tree_fits_shwi_p (cst))
2877     return true;
2878   val = tree_to_shwi (cst);
2879
2880   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2881     {
2882       step = CHREC_RIGHT (chrec);
2883       if (!tree_fits_shwi_p (step))
2884         return true;
2885       cd = gcd (cd, tree_to_shwi (step));
2886       chrec = CHREC_LEFT (chrec);
2887     }
2888
2889   return val % cd == 0;
2890 }
2891
2892 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2893    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
2894    functions that describe the relation between the elements accessed
2895    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
2896    is verified:
2897
2898    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2899
2900 static void
2901 analyze_miv_subscript (tree chrec_a,
2902                        tree chrec_b,
2903                        conflict_function **overlaps_a,
2904                        conflict_function **overlaps_b,
2905                        tree *last_conflicts,
2906                        struct loop *loop_nest)
2907 {
2908   tree type, difference;
2909
2910   dependence_stats.num_miv++;
2911   if (dump_file && (dump_flags & TDF_DETAILS))
2912     fprintf (dump_file, "(analyze_miv_subscript \n");
2913
2914   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2915   chrec_a = chrec_convert (type, chrec_a, NULL);
2916   chrec_b = chrec_convert (type, chrec_b, NULL);
2917   difference = chrec_fold_minus (type, chrec_a, chrec_b);
2918
2919   if (eq_evolutions_p (chrec_a, chrec_b))
2920     {
2921       /* Access functions are the same: all the elements are accessed
2922          in the same order.  */
2923       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2924       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2925       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2926       dependence_stats.num_miv_dependent++;
2927     }
2928
2929   else if (evolution_function_is_constant_p (difference)
2930            /* For the moment, the following is verified:
2931               evolution_function_is_affine_multivariate_p (chrec_a,
2932               loop_nest->num) */
2933            && !gcd_of_steps_may_divide_p (chrec_a, difference))
2934     {
2935       /* testsuite/.../ssa-chrec-33.c
2936          {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
2937
2938          The difference is 1, and all the evolution steps are multiples
2939          of 2, consequently there are no overlapping elements.  */
2940       *overlaps_a = conflict_fn_no_dependence ();
2941       *overlaps_b = conflict_fn_no_dependence ();
2942       *last_conflicts = integer_zero_node;
2943       dependence_stats.num_miv_independent++;
2944     }
2945
2946   else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2947            && !chrec_contains_symbols (chrec_a)
2948            && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2949            && !chrec_contains_symbols (chrec_b))
2950     {
2951       /* testsuite/.../ssa-chrec-35.c
2952          {0, +, 1}_2  vs.  {0, +, 1}_3
2953          the overlapping elements are respectively located at iterations:
2954          {0, +, 1}_x and {0, +, 1}_x,
2955          in other words, we have the equality:
2956          {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2957
2958          Other examples:
2959          {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2960          {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2961
2962          {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2963          {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2964       */
2965       analyze_subscript_affine_affine (chrec_a, chrec_b,
2966                                        overlaps_a, overlaps_b, last_conflicts);
2967
2968       if (CF_NOT_KNOWN_P (*overlaps_a)
2969           || CF_NOT_KNOWN_P (*overlaps_b))
2970         dependence_stats.num_miv_unimplemented++;
2971       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2972                || CF_NO_DEPENDENCE_P (*overlaps_b))
2973         dependence_stats.num_miv_independent++;
2974       else
2975         dependence_stats.num_miv_dependent++;
2976     }
2977
2978   else
2979     {
2980       /* When the analysis is too difficult, answer "don't know".  */
2981       if (dump_file && (dump_flags & TDF_DETAILS))
2982         fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2983
2984       *overlaps_a = conflict_fn_not_known ();
2985       *overlaps_b = conflict_fn_not_known ();
2986       *last_conflicts = chrec_dont_know;
2987       dependence_stats.num_miv_unimplemented++;
2988     }
2989
2990   if (dump_file && (dump_flags & TDF_DETAILS))
2991     fprintf (dump_file, ")\n");
2992 }
2993
2994 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2995    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
2996    OVERLAP_ITERATIONS_B are initialized with two functions that
2997    describe the iterations that contain conflicting elements.
2998
2999    Remark: For an integer k >= 0, the following equality is true:
3000
3001    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
3002 */
3003
3004 static void
3005 analyze_overlapping_iterations (tree chrec_a,
3006                                 tree chrec_b,
3007                                 conflict_function **overlap_iterations_a,
3008                                 conflict_function **overlap_iterations_b,
3009                                 tree *last_conflicts, struct loop *loop_nest)
3010 {
3011   unsigned int lnn = loop_nest->num;
3012
3013   dependence_stats.num_subscript_tests++;
3014
3015   if (dump_file && (dump_flags & TDF_DETAILS))
3016     {
3017       fprintf (dump_file, "(analyze_overlapping_iterations \n");
3018       fprintf (dump_file, "  (chrec_a = ");
3019       print_generic_expr (dump_file, chrec_a, 0);
3020       fprintf (dump_file, ")\n  (chrec_b = ");
3021       print_generic_expr (dump_file, chrec_b, 0);
3022       fprintf (dump_file, ")\n");
3023     }
3024
3025   if (chrec_a == NULL_TREE
3026       || chrec_b == NULL_TREE
3027       || chrec_contains_undetermined (chrec_a)
3028       || chrec_contains_undetermined (chrec_b))
3029     {
3030       dependence_stats.num_subscript_undetermined++;
3031
3032       *overlap_iterations_a = conflict_fn_not_known ();
3033       *overlap_iterations_b = conflict_fn_not_known ();
3034     }
3035
3036   /* If they are the same chrec, and are affine, they overlap
3037      on every iteration.  */
3038   else if (eq_evolutions_p (chrec_a, chrec_b)
3039            && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
3040                || operand_equal_p (chrec_a, chrec_b, 0)))
3041     {
3042       dependence_stats.num_same_subscript_function++;
3043       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3044       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3045       *last_conflicts = chrec_dont_know;
3046     }
3047
3048   /* If they aren't the same, and aren't affine, we can't do anything
3049      yet.  */
3050   else if ((chrec_contains_symbols (chrec_a)
3051             || chrec_contains_symbols (chrec_b))
3052            && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
3053                || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
3054     {
3055       dependence_stats.num_subscript_undetermined++;
3056       *overlap_iterations_a = conflict_fn_not_known ();
3057       *overlap_iterations_b = conflict_fn_not_known ();
3058     }
3059
3060   else if (ziv_subscript_p (chrec_a, chrec_b))
3061     analyze_ziv_subscript (chrec_a, chrec_b,
3062                            overlap_iterations_a, overlap_iterations_b,
3063                            last_conflicts);
3064
3065   else if (siv_subscript_p (chrec_a, chrec_b))
3066     analyze_siv_subscript (chrec_a, chrec_b,
3067                            overlap_iterations_a, overlap_iterations_b,
3068                            last_conflicts, lnn);
3069
3070   else
3071     analyze_miv_subscript (chrec_a, chrec_b,
3072                            overlap_iterations_a, overlap_iterations_b,
3073                            last_conflicts, loop_nest);
3074
3075   if (dump_file && (dump_flags & TDF_DETAILS))
3076     {
3077       fprintf (dump_file, "  (overlap_iterations_a = ");
3078       dump_conflict_function (dump_file, *overlap_iterations_a);
3079       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
3080       dump_conflict_function (dump_file, *overlap_iterations_b);
3081       fprintf (dump_file, "))\n");
3082     }
3083 }
3084
3085 /* Helper function for uniquely inserting distance vectors.  */
3086
3087 static void
3088 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
3089 {
3090   unsigned i;
3091   lambda_vector v;
3092
3093   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
3094     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
3095       return;
3096
3097   DDR_DIST_VECTS (ddr).safe_push (dist_v);
3098 }
3099
3100 /* Helper function for uniquely inserting direction vectors.  */
3101
3102 static void
3103 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
3104 {
3105   unsigned i;
3106   lambda_vector v;
3107
3108   FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
3109     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
3110       return;
3111
3112   DDR_DIR_VECTS (ddr).safe_push (dir_v);
3113 }
3114
3115 /* Add a distance of 1 on all the loops outer than INDEX.  If we
3116    haven't yet determined a distance for this outer loop, push a new
3117    distance vector composed of the previous distance, and a distance
3118    of 1 for this outer loop.  Example:
3119
3120    | loop_1
3121    |   loop_2
3122    |     A[10]
3123    |   endloop_2
3124    | endloop_1
3125
3126    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
3127    save (0, 1), then we have to save (1, 0).  */
3128
3129 static void
3130 add_outer_distances (struct data_dependence_relation *ddr,
3131                      lambda_vector dist_v, int index)
3132 {
3133   /* For each outer loop where init_v is not set, the accesses are
3134      in dependence of distance 1 in the loop.  */
3135   while (--index >= 0)
3136     {
3137       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3138       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3139       save_v[index] = 1;
3140       save_dist_v (ddr, save_v);
3141     }
3142 }
3143
3144 /* Return false when fail to represent the data dependence as a
3145    distance vector.  INIT_B is set to true when a component has been
3146    added to the distance vector DIST_V.  INDEX_CARRY is then set to
3147    the index in DIST_V that carries the dependence.  */
3148
3149 static bool
3150 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
3151                              struct data_reference *ddr_a,
3152                              struct data_reference *ddr_b,
3153                              lambda_vector dist_v, bool *init_b,
3154                              int *index_carry)
3155 {
3156   unsigned i;
3157   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3158
3159   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3160     {
3161       tree access_fn_a, access_fn_b;
3162       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
3163
3164       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3165         {
3166           non_affine_dependence_relation (ddr);
3167           return false;
3168         }
3169
3170       access_fn_a = DR_ACCESS_FN (ddr_a, i);
3171       access_fn_b = DR_ACCESS_FN (ddr_b, i);
3172
3173       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
3174           && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
3175         {
3176           int dist, index;
3177           int var_a = CHREC_VARIABLE (access_fn_a);
3178           int var_b = CHREC_VARIABLE (access_fn_b);
3179
3180           if (var_a != var_b
3181               || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3182             {
3183               non_affine_dependence_relation (ddr);
3184               return false;
3185             }
3186
3187           dist = int_cst_value (SUB_DISTANCE (subscript));
3188           index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3189           *index_carry = MIN (index, *index_carry);
3190
3191           /* This is the subscript coupling test.  If we have already
3192              recorded a distance for this loop (a distance coming from
3193              another subscript), it should be the same.  For example,
3194              in the following code, there is no dependence:
3195
3196              | loop i = 0, N, 1
3197              |   T[i+1][i] = ...
3198              |   ... = T[i][i]
3199              | endloop
3200           */
3201           if (init_v[index] != 0 && dist_v[index] != dist)
3202             {
3203               finalize_ddr_dependent (ddr, chrec_known);
3204               return false;
3205             }
3206
3207           dist_v[index] = dist;
3208           init_v[index] = 1;
3209           *init_b = true;
3210         }
3211       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3212         {
3213           /* This can be for example an affine vs. constant dependence
3214              (T[i] vs. T[3]) that is not an affine dependence and is
3215              not representable as a distance vector.  */
3216           non_affine_dependence_relation (ddr);
3217           return false;
3218         }
3219     }
3220
3221   return true;
3222 }
3223
3224 /* Return true when the DDR contains only constant access functions.  */
3225
3226 static bool
3227 constant_access_functions (const struct data_dependence_relation *ddr)
3228 {
3229   unsigned i;
3230
3231   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3232     if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3233         || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3234       return false;
3235
3236   return true;
3237 }
3238
3239 /* Helper function for the case where DDR_A and DDR_B are the same
3240    multivariate access function with a constant step.  For an example
3241    see pr34635-1.c.  */
3242
3243 static void
3244 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3245 {
3246   int x_1, x_2;
3247   tree c_1 = CHREC_LEFT (c_2);
3248   tree c_0 = CHREC_LEFT (c_1);
3249   lambda_vector dist_v;
3250   int v1, v2, cd;
3251
3252   /* Polynomials with more than 2 variables are not handled yet.  When
3253      the evolution steps are parameters, it is not possible to
3254      represent the dependence using classical distance vectors.  */
3255   if (TREE_CODE (c_0) != INTEGER_CST
3256       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3257       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3258     {
3259       DDR_AFFINE_P (ddr) = false;
3260       return;
3261     }
3262
3263   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3264   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3265
3266   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
3267   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3268   v1 = int_cst_value (CHREC_RIGHT (c_1));
3269   v2 = int_cst_value (CHREC_RIGHT (c_2));
3270   cd = gcd (v1, v2);
3271   v1 /= cd;
3272   v2 /= cd;
3273
3274   if (v2 < 0)
3275     {
3276       v2 = -v2;
3277       v1 = -v1;
3278     }
3279
3280   dist_v[x_1] = v2;
3281   dist_v[x_2] = -v1;
3282   save_dist_v (ddr, dist_v);
3283
3284   add_outer_distances (ddr, dist_v, x_1);
3285 }
3286
3287 /* Helper function for the case where DDR_A and DDR_B are the same
3288    access functions.  */
3289
3290 static void
3291 add_other_self_distances (struct data_dependence_relation *ddr)
3292 {
3293   lambda_vector dist_v;
3294   unsigned i;
3295   int index_carry = DDR_NB_LOOPS (ddr);
3296
3297   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3298     {
3299       tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3300
3301       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3302         {
3303           if (!evolution_function_is_univariate_p (access_fun))
3304             {
3305               if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3306                 {
3307                   DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3308                   return;
3309                 }
3310
3311               access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3312
3313               if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3314                 add_multivariate_self_dist (ddr, access_fun);
3315               else
3316                 /* The evolution step is not constant: it varies in
3317                    the outer loop, so this cannot be represented by a
3318                    distance vector.  For example in pr34635.c the
3319                    evolution is {0, +, {0, +, 4}_1}_2.  */
3320                 DDR_AFFINE_P (ddr) = false;
3321
3322               return;
3323             }
3324
3325           index_carry = MIN (index_carry,
3326                              index_in_loop_nest (CHREC_VARIABLE (access_fun),
3327                                                  DDR_LOOP_NEST (ddr)));
3328         }
3329     }
3330
3331   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3332   add_outer_distances (ddr, dist_v, index_carry);
3333 }
3334
3335 static void
3336 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3337 {
3338   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3339
3340   dist_v[DDR_INNER_LOOP (ddr)] = 1;
3341   save_dist_v (ddr, dist_v);
3342 }
3343
3344 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
3345    is the case for example when access functions are the same and
3346    equal to a constant, as in:
3347
3348    | loop_1
3349    |   A[3] = ...
3350    |   ... = A[3]
3351    | endloop_1
3352
3353    in which case the distance vectors are (0) and (1).  */
3354
3355 static void
3356 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3357 {
3358   unsigned i, j;
3359
3360   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3361     {
3362       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3363       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3364       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3365
3366       for (j = 0; j < ca->n; j++)
3367         if (affine_function_zero_p (ca->fns[j]))
3368           {
3369             insert_innermost_unit_dist_vector (ddr);
3370             return;
3371           }
3372
3373       for (j = 0; j < cb->n; j++)
3374         if (affine_function_zero_p (cb->fns[j]))
3375           {
3376             insert_innermost_unit_dist_vector (ddr);
3377             return;
3378           }
3379     }
3380 }
3381
3382 /* Compute the classic per loop distance vector.  DDR is the data
3383    dependence relation to build a vector from.  Return false when fail
3384    to represent the data dependence as a distance vector.  */
3385
3386 static bool
3387 build_classic_dist_vector (struct data_dependence_relation *ddr,
3388                            struct loop *loop_nest)
3389 {
3390   bool init_b = false;
3391   int index_carry = DDR_NB_LOOPS (ddr);
3392   lambda_vector dist_v;
3393
3394   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3395     return false;
3396
3397   if (same_access_functions (ddr))
3398     {
3399       /* Save the 0 vector.  */
3400       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3401       save_dist_v (ddr, dist_v);
3402
3403       if (constant_access_functions (ddr))
3404         add_distance_for_zero_overlaps (ddr);
3405
3406       if (DDR_NB_LOOPS (ddr) > 1)
3407         add_other_self_distances (ddr);
3408
3409       return true;
3410     }
3411
3412   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3413   if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3414                                     dist_v, &init_b, &index_carry))
3415     return false;
3416
3417   /* Save the distance vector if we initialized one.  */
3418   if (init_b)
3419     {
3420       /* Verify a basic constraint: classic distance vectors should
3421          always be lexicographically positive.
3422
3423          Data references are collected in the order of execution of
3424          the program, thus for the following loop
3425
3426          | for (i = 1; i < 100; i++)
3427          |   for (j = 1; j < 100; j++)
3428          |     {
3429          |       t = T[j+1][i-1];  // A
3430          |       T[j][i] = t + 2;  // B
3431          |     }
3432
3433          references are collected following the direction of the wind:
3434          A then B.  The data dependence tests are performed also
3435          following this order, such that we're looking at the distance
3436          separating the elements accessed by A from the elements later
3437          accessed by B.  But in this example, the distance returned by
3438          test_dep (A, B) is lexicographically negative (-1, 1), that
3439          means that the access A occurs later than B with respect to
3440          the outer loop, ie. we're actually looking upwind.  In this
3441          case we solve test_dep (B, A) looking downwind to the
3442          lexicographically positive solution, that returns the
3443          distance vector (1, -1).  */
3444       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3445         {
3446           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3447           if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3448                                               loop_nest))
3449             return false;
3450           compute_subscript_distance (ddr);
3451           if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3452                                             save_v, &init_b, &index_carry))
3453             return false;
3454           save_dist_v (ddr, save_v);
3455           DDR_REVERSED_P (ddr) = true;
3456
3457           /* In this case there is a dependence forward for all the
3458              outer loops:
3459
3460              | for (k = 1; k < 100; k++)
3461              |  for (i = 1; i < 100; i++)
3462              |   for (j = 1; j < 100; j++)
3463              |     {
3464              |       t = T[j+1][i-1];  // A
3465              |       T[j][i] = t + 2;  // B
3466              |     }
3467
3468              the vectors are:
3469              (0,  1, -1)
3470              (1,  1, -1)
3471              (1, -1,  1)
3472           */
3473           if (DDR_NB_LOOPS (ddr) > 1)
3474             {
3475               add_outer_distances (ddr, save_v, index_carry);
3476               add_outer_distances (ddr, dist_v, index_carry);
3477             }
3478         }
3479       else
3480         {
3481           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3482           lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3483
3484           if (DDR_NB_LOOPS (ddr) > 1)
3485             {
3486               lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3487
3488               if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3489                                                   DDR_A (ddr), loop_nest))
3490                 return false;
3491               compute_subscript_distance (ddr);
3492               if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3493                                                 opposite_v, &init_b,
3494                                                 &index_carry))
3495                 return false;
3496
3497               save_dist_v (ddr, save_v);
3498               add_outer_distances (ddr, dist_v, index_carry);
3499               add_outer_distances (ddr, opposite_v, index_carry);
3500             }
3501           else
3502             save_dist_v (ddr, save_v);
3503         }
3504     }
3505   else
3506     {
3507       /* There is a distance of 1 on all the outer loops: Example:
3508          there is a dependence of distance 1 on loop_1 for the array A.
3509
3510          | loop_1
3511          |   A[5] = ...
3512          | endloop
3513       */
3514       add_outer_distances (ddr, dist_v,
3515                            lambda_vector_first_nz (dist_v,
3516                                                    DDR_NB_LOOPS (ddr), 0));
3517     }
3518
3519   if (dump_file && (dump_flags & TDF_DETAILS))
3520     {
3521       unsigned i;
3522
3523       fprintf (dump_file, "(build_classic_dist_vector\n");
3524       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3525         {
3526           fprintf (dump_file, "  dist_vector = (");
3527           print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3528                                DDR_NB_LOOPS (ddr));
3529           fprintf (dump_file, "  )\n");
3530         }
3531       fprintf (dump_file, ")\n");
3532     }
3533
3534   return true;
3535 }
3536
3537 /* Return the direction for a given distance.
3538    FIXME: Computing dir this way is suboptimal, since dir can catch
3539    cases that dist is unable to represent.  */
3540
3541 static inline enum data_dependence_direction
3542 dir_from_dist (int dist)
3543 {
3544   if (dist > 0)
3545     return dir_positive;
3546   else if (dist < 0)
3547     return dir_negative;
3548   else
3549     return dir_equal;
3550 }
3551
3552 /* Compute the classic per loop direction vector.  DDR is the data
3553    dependence relation to build a vector from.  */
3554
3555 static void
3556 build_classic_dir_vector (struct data_dependence_relation *ddr)
3557 {
3558   unsigned i, j;
3559   lambda_vector dist_v;
3560
3561   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
3562     {
3563       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3564
3565       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3566         dir_v[j] = dir_from_dist (dist_v[j]);
3567
3568       save_dir_v (ddr, dir_v);
3569     }
3570 }
3571
3572 /* Helper function.  Returns true when there is a dependence between
3573    data references DRA and DRB.  */
3574
3575 static bool
3576 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3577                                struct data_reference *dra,
3578                                struct data_reference *drb,
3579                                struct loop *loop_nest)
3580 {
3581   unsigned int i;
3582   tree last_conflicts;
3583   struct subscript *subscript;
3584   tree res = NULL_TREE;
3585
3586   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
3587     {
3588       conflict_function *overlaps_a, *overlaps_b;
3589
3590       analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3591                                       DR_ACCESS_FN (drb, i),
3592                                       &overlaps_a, &overlaps_b,
3593                                       &last_conflicts, loop_nest);
3594
3595       if (SUB_CONFLICTS_IN_A (subscript))
3596         free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3597       if (SUB_CONFLICTS_IN_B (subscript))
3598         free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3599
3600       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3601       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3602       SUB_LAST_CONFLICT (subscript) = last_conflicts;
3603
3604       /* If there is any undetermined conflict function we have to
3605          give a conservative answer in case we cannot prove that
3606          no dependence exists when analyzing another subscript.  */
3607       if (CF_NOT_KNOWN_P (overlaps_a)
3608           || CF_NOT_KNOWN_P (overlaps_b))
3609         {
3610           res = chrec_dont_know;
3611           continue;
3612         }
3613
3614       /* When there is a subscript with no dependence we can stop.  */
3615       else if (CF_NO_DEPENDENCE_P (overlaps_a)
3616                || CF_NO_DEPENDENCE_P (overlaps_b))
3617         {
3618           res = chrec_known;
3619           break;
3620         }
3621     }
3622
3623   if (res == NULL_TREE)
3624     return true;
3625
3626   if (res == chrec_known)
3627     dependence_stats.num_dependence_independent++;
3628   else
3629     dependence_stats.num_dependence_undetermined++;
3630   finalize_ddr_dependent (ddr, res);
3631   return false;
3632 }
3633
3634 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
3635
3636 static void
3637 subscript_dependence_tester (struct data_dependence_relation *ddr,
3638                              struct loop *loop_nest)
3639 {
3640   if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3641     dependence_stats.num_dependence_dependent++;
3642
3643   compute_subscript_distance (ddr);
3644   if (build_classic_dist_vector (ddr, loop_nest))
3645     build_classic_dir_vector (ddr);
3646 }
3647
3648 /* Returns true when all the access functions of A are affine or
3649    constant with respect to LOOP_NEST.  */
3650
3651 static bool
3652 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3653                                            const struct loop *loop_nest)
3654 {
3655   unsigned int i;
3656   vec<tree> fns = DR_ACCESS_FNS (a);
3657   tree t;
3658
3659   FOR_EACH_VEC_ELT (fns, i, t)
3660     if (!evolution_function_is_invariant_p (t, loop_nest->num)
3661         && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3662       return false;
3663
3664   return true;
3665 }
3666
3667 /* Initializes an equation for an OMEGA problem using the information
3668    contained in the ACCESS_FUN.  Returns true when the operation
3669    succeeded.
3670
3671    PB is the omega constraint system.
3672    EQ is the number of the equation to be initialized.
3673    OFFSET is used for shifting the variables names in the constraints:
3674    a constrain is composed of 2 * the number of variables surrounding
3675    dependence accesses.  OFFSET is set either to 0 for the first n variables,
3676    then it is set to n.
3677    ACCESS_FUN is expected to be an affine chrec.  */
3678
3679 static bool
3680 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3681                        unsigned int offset, tree access_fun,
3682                        struct data_dependence_relation *ddr)
3683 {
3684   switch (TREE_CODE (access_fun))
3685     {
3686     case POLYNOMIAL_CHREC:
3687       {
3688         tree left = CHREC_LEFT (access_fun);
3689         tree right = CHREC_RIGHT (access_fun);
3690         int var = CHREC_VARIABLE (access_fun);
3691         unsigned var_idx;
3692
3693         if (TREE_CODE (right) != INTEGER_CST)
3694           return false;
3695
3696         var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3697         pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3698
3699         /* Compute the innermost loop index.  */
3700         DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3701
3702         if (offset == 0)
3703           pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3704             += int_cst_value (right);
3705
3706         switch (TREE_CODE (left))
3707           {
3708           case POLYNOMIAL_CHREC:
3709             return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3710
3711           case INTEGER_CST:
3712             pb->eqs[eq].coef[0] += int_cst_value (left);
3713             return true;
3714
3715           default:
3716             return false;
3717           }
3718       }
3719
3720     case INTEGER_CST:
3721       pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3722       return true;
3723
3724     default:
3725       return false;
3726     }
3727 }
3728
3729 /* As explained in the comments preceding init_omega_for_ddr, we have
3730    to set up a system for each loop level, setting outer loops
3731    variation to zero, and current loop variation to positive or zero.
3732    Save each lexico positive distance vector.  */
3733
3734 static void
3735 omega_extract_distance_vectors (omega_pb pb,
3736                                 struct data_dependence_relation *ddr)
3737 {
3738   int eq, geq;
3739   unsigned i, j;
3740   struct loop *loopi, *loopj;
3741   enum omega_result res;
3742
3743   /* Set a new problem for each loop in the nest.  The basis is the
3744      problem that we have initialized until now.  On top of this we
3745      add new constraints.  */
3746   for (i = 0; i <= DDR_INNER_LOOP (ddr)
3747               && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++)
3748     {
3749       int dist = 0;
3750       omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3751                                            DDR_NB_LOOPS (ddr));
3752
3753       omega_copy_problem (copy, pb);
3754
3755       /* For all the outer loops "loop_j", add "dj = 0".  */
3756       for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++)
3757         {
3758           eq = omega_add_zero_eq (copy, omega_black);
3759           copy->eqs[eq].coef[j + 1] = 1;
3760         }
3761
3762       /* For "loop_i", add "0 <= di".  */
3763       geq = omega_add_zero_geq (copy, omega_black);
3764       copy->geqs[geq].coef[i + 1] = 1;
3765
3766       /* Reduce the constraint system, and test that the current
3767          problem is feasible.  */
3768       res = omega_simplify_problem (copy);
3769       if (res == omega_false
3770           || res == omega_unknown
3771           || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3772         goto next_problem;
3773
3774       for (eq = 0; eq < copy->num_subs; eq++)
3775         if (copy->subs[eq].key == (int) i + 1)
3776           {
3777             dist = copy->subs[eq].coef[0];
3778             goto found_dist;
3779           }
3780
3781       if (dist == 0)
3782         {
3783           /* Reinitialize problem...  */
3784           omega_copy_problem (copy, pb);
3785           for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++)
3786             {
3787               eq = omega_add_zero_eq (copy, omega_black);
3788               copy->eqs[eq].coef[j + 1] = 1;
3789             }
3790
3791           /* ..., but this time "di = 1".  */
3792           eq = omega_add_zero_eq (copy, omega_black);
3793           copy->eqs[eq].coef[i + 1] = 1;
3794           copy->eqs[eq].coef[0] = -1;
3795
3796           res = omega_simplify_problem (copy);
3797           if (res == omega_false
3798               || res == omega_unknown
3799               || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3800             goto next_problem;
3801
3802           for (eq = 0; eq < copy->num_subs; eq++)
3803             if (copy->subs[eq].key == (int) i + 1)
3804               {
3805                 dist = copy->subs[eq].coef[0];
3806                 goto found_dist;
3807               }
3808         }
3809
3810     found_dist:;
3811       /* Save the lexicographically positive distance vector.  */
3812       if (dist >= 0)
3813         {
3814           lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3815           lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3816
3817           dist_v[i] = dist;
3818
3819           for (eq = 0; eq < copy->num_subs; eq++)
3820             if (copy->subs[eq].key > 0)
3821               {
3822                 dist = copy->subs[eq].coef[0];
3823                 dist_v[copy->subs[eq].key - 1] = dist;
3824               }
3825
3826           for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3827             dir_v[j] = dir_from_dist (dist_v[j]);
3828
3829           save_dist_v (ddr, dist_v);
3830           save_dir_v (ddr, dir_v);
3831         }
3832
3833     next_problem:;
3834       omega_free_problem (copy);
3835     }
3836 }
3837
3838 /* This is called for each subscript of a tuple of data references:
3839    insert an equality for representing the conflicts.  */
3840
3841 static bool
3842 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3843                        struct data_dependence_relation *ddr,
3844                        omega_pb pb, bool *maybe_dependent)
3845 {
3846   int eq;
3847   tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3848                                      TREE_TYPE (access_fun_b));
3849   tree fun_a = chrec_convert (type, access_fun_a, NULL);
3850   tree fun_b = chrec_convert (type, access_fun_b, NULL);
3851   tree difference = chrec_fold_minus (type, fun_a, fun_b);
3852   tree minus_one;
3853
3854   /* When the fun_a - fun_b is not constant, the dependence is not
3855      captured by the classic distance vector representation.  */
3856   if (TREE_CODE (difference) != INTEGER_CST)
3857     return false;
3858
3859   /* ZIV test.  */
3860   if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3861     {
3862       /* There is no dependence.  */
3863       *maybe_dependent = false;
3864       return true;
3865     }
3866
3867   minus_one = build_int_cst (type, -1);
3868   fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3869
3870   eq = omega_add_zero_eq (pb, omega_black);
3871   if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3872       || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3873     /* There is probably a dependence, but the system of
3874        constraints cannot be built: answer "don't know".  */
3875     return false;
3876
3877   /* GCD test.  */
3878   if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3879       && !int_divides_p (lambda_vector_gcd
3880                          ((lambda_vector) &(pb->eqs[eq].coef[1]),
3881                           2 * DDR_NB_LOOPS (ddr)),
3882                          pb->eqs[eq].coef[0]))
3883     {
3884       /* There is no dependence.  */
3885       *maybe_dependent = false;
3886       return true;
3887     }
3888
3889   return true;
3890 }
3891
3892 /* Helper function, same as init_omega_for_ddr but specialized for
3893    data references A and B.  */
3894
3895 static bool
3896 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3897                       struct data_dependence_relation *ddr,
3898                       omega_pb pb, bool *maybe_dependent)
3899 {
3900   unsigned i;
3901   int ineq;
3902   struct loop *loopi;
3903   unsigned nb_loops = DDR_NB_LOOPS (ddr);
3904
3905   /* Insert an equality per subscript.  */
3906   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3907     {
3908       if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3909                                   ddr, pb, maybe_dependent))
3910         return false;
3911       else if (*maybe_dependent == false)
3912         {
3913           /* There is no dependence.  */
3914           DDR_ARE_DEPENDENT (ddr) = chrec_known;
3915           return true;
3916         }
3917     }
3918
3919   /* Insert inequalities: constraints corresponding to the iteration
3920      domain, i.e. the loops surrounding the references "loop_x" and
3921      the distance variables "dx".  The layout of the OMEGA
3922      representation is as follows:
3923      - coef[0] is the constant
3924      - coef[1..nb_loops] are the protected variables that will not be
3925      removed by the solver: the "dx"
3926      - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3927   */
3928   for (i = 0; i <= DDR_INNER_LOOP (ddr)
3929               && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++)
3930     {
3931       HOST_WIDE_INT nbi = max_stmt_executions_int (loopi);
3932
3933       /* 0 <= loop_x */
3934       ineq = omega_add_zero_geq (pb, omega_black);
3935       pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3936
3937       /* 0 <= loop_x + dx */
3938       ineq = omega_add_zero_geq (pb, omega_black);
3939       pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3940       pb->geqs[ineq].coef[i + 1] = 1;
3941
3942       if (nbi != -1)
3943         {
3944           /* loop_x <= nb_iters */
3945           ineq = omega_add_zero_geq (pb, omega_black);
3946           pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3947           pb->geqs[ineq].coef[0] = nbi;
3948
3949           /* loop_x + dx <= nb_iters */
3950           ineq = omega_add_zero_geq (pb, omega_black);
3951           pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3952           pb->geqs[ineq].coef[i + 1] = -1;
3953           pb->geqs[ineq].coef[0] = nbi;
3954
3955           /* A step "dx" bigger than nb_iters is not feasible, so
3956              add "0 <= nb_iters + dx",  */
3957           ineq = omega_add_zero_geq (pb, omega_black);
3958           pb->geqs[ineq].coef[i + 1] = 1;
3959           pb->geqs[ineq].coef[0] = nbi;
3960           /* and "dx <= nb_iters".  */
3961           ineq = omega_add_zero_geq (pb, omega_black);
3962           pb->geqs[ineq].coef[i + 1] = -1;
3963           pb->geqs[ineq].coef[0] = nbi;
3964         }
3965     }
3966
3967   omega_extract_distance_vectors (pb, ddr);
3968
3969   return true;
3970 }
3971
3972 /* Sets up the Omega dependence problem for the data dependence
3973    relation DDR.  Returns false when the constraint system cannot be
3974    built, ie. when the test answers "don't know".  Returns true
3975    otherwise, and when independence has been proved (using one of the
3976    trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3977    set MAYBE_DEPENDENT to true.
3978
3979    Example: for setting up the dependence system corresponding to the
3980    conflicting accesses
3981
3982    | loop_i
3983    |   loop_j
3984    |     A[i, i+1] = ...
3985    |     ... A[2*j, 2*(i + j)]
3986    |   endloop_j
3987    | endloop_i
3988
3989    the following constraints come from the iteration domain:
3990
3991    0 <= i <= Ni
3992    0 <= i + di <= Ni
3993    0 <= j <= Nj
3994    0 <= j + dj <= Nj
3995
3996    where di, dj are the distance variables.  The constraints
3997    representing the conflicting elements are:
3998
3999    i = 2 * (j + dj)
4000    i + 1 = 2 * (i + di + j + dj)
4001
4002    For asking that the resulting distance vector (di, dj) be
4003    lexicographically positive, we insert the constraint "di >= 0".  If
4004    "di = 0" in the solution, we fix that component to zero, and we
4005    look at the inner loops: we set a new problem where all the outer
4006    loop distances are zero, and fix this inner component to be
4007    positive.  When one of the components is positive, we save that
4008    distance, and set a new problem where the distance on this loop is
4009    zero, searching for other distances in the inner loops.  Here is
4010    the classic example that illustrates that we have to set for each
4011    inner loop a new problem:
4012
4013    | loop_1
4014    |   loop_2
4015    |     A[10]
4016    |   endloop_2
4017    | endloop_1
4018
4019    we have to save two distances (1, 0) and (0, 1).
4020
4021    Given two array references, refA and refB, we have to set the
4022    dependence problem twice, refA vs. refB and refB vs. refA, and we
4023    cannot do a single test, as refB might occur before refA in the
4024    inner loops, and the contrary when considering outer loops: ex.
4025
4026    | loop_0
4027    |   loop_1
4028    |     loop_2
4029    |       T[{1,+,1}_2][{1,+,1}_1]  // refA
4030    |       T[{2,+,1}_2][{0,+,1}_1]  // refB
4031    |     endloop_2
4032    |   endloop_1
4033    | endloop_0
4034
4035    refB touches the elements in T before refA, and thus for the same
4036    loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
4037    but for successive loop_0 iterations, we have (1, -1, 1)
4038
4039    The Omega solver expects the distance variables ("di" in the
4040    previous example) to come first in the constraint system (as
4041    variables to be protected, or "safe" variables), the constraint
4042    system is built using the following layout:
4043
4044    "cst | distance vars | index vars".
4045 */
4046
4047 static bool
4048 init_omega_for_ddr (struct data_dependence_relation *ddr,
4049                     bool *maybe_dependent)
4050 {
4051   omega_pb pb;
4052   bool res = false;
4053
4054   *maybe_dependent = true;
4055
4056   if (same_access_functions (ddr))
4057     {
4058       unsigned j;
4059       lambda_vector dir_v;
4060
4061       /* Save the 0 vector.  */
4062       save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4063       dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4064       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4065         dir_v[j] = dir_equal;
4066       save_dir_v (ddr, dir_v);
4067
4068       /* Save the dependences carried by outer loops.  */
4069       pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4070       res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
4071                                   maybe_dependent);
4072       omega_free_problem (pb);
4073       return res;
4074     }
4075
4076   /* Omega expects the protected variables (those that have to be kept
4077      after elimination) to appear first in the constraint system.
4078      These variables are the distance variables.  In the following
4079      initialization we declare NB_LOOPS safe variables, and the total
4080      number of variables for the constraint system is 2*NB_LOOPS.  */
4081   pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4082   res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
4083                               maybe_dependent);
4084   omega_free_problem (pb);
4085
4086   /* Stop computation if not decidable, or no dependence.  */
4087   if (res == false || *maybe_dependent == false)
4088     return res;
4089
4090   pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4091   res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
4092                               maybe_dependent);
4093   omega_free_problem (pb);
4094
4095   return res;
4096 }
4097
4098 /* Return true when DDR contains the same information as that stored
4099    in DIR_VECTS and in DIST_VECTS, return false otherwise.   */
4100
4101 static bool
4102 ddr_consistent_p (FILE *file,
4103                   struct data_dependence_relation *ddr,
4104                   vec<lambda_vector> dist_vects,
4105                   vec<lambda_vector> dir_vects)
4106 {
4107   unsigned int i, j;
4108
4109   /* If dump_file is set, output there.  */
4110   if (dump_file && (dump_flags & TDF_DETAILS))
4111     file = dump_file;
4112
4113   if (dist_vects.length () != DDR_NUM_DIST_VECTS (ddr))
4114     {
4115       lambda_vector b_dist_v;
4116       fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
4117                dist_vects.length (),
4118                DDR_NUM_DIST_VECTS (ddr));
4119
4120       fprintf (file, "Banerjee dist vectors:\n");
4121       FOR_EACH_VEC_ELT (dist_vects, i, b_dist_v)
4122         print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
4123
4124       fprintf (file, "Omega dist vectors:\n");
4125       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4126         print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
4127
4128       fprintf (file, "data dependence relation:\n");
4129       dump_data_dependence_relation (file, ddr);
4130
4131       fprintf (file, ")\n");
4132       return false;
4133     }
4134
4135   if (dir_vects.length () != DDR_NUM_DIR_VECTS (ddr))
4136     {
4137       fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
4138                dir_vects.length (),
4139                DDR_NUM_DIR_VECTS (ddr));
4140       return false;
4141     }
4142
4143   for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4144     {
4145       lambda_vector a_dist_v;
4146       lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
4147
4148       /* Distance vectors are not ordered in the same way in the DDR
4149          and in the DIST_VECTS: search for a matching vector.  */
4150       FOR_EACH_VEC_ELT (dist_vects, j, a_dist_v)
4151         if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
4152           break;
4153
4154       if (j == dist_vects.length ())
4155         {
4156           fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
4157           print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
4158           fprintf (file, "not found in Omega dist vectors:\n");
4159           print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
4160           fprintf (file, "data dependence relation:\n");
4161           dump_data_dependence_relation (file, ddr);
4162           fprintf (file, ")\n");
4163         }
4164     }
4165
4166   for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
4167     {
4168       lambda_vector a_dir_v;
4169       lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
4170
4171       /* Direction vectors are not ordered in the same way in the DDR
4172          and in the DIR_VECTS: search for a matching vector.  */
4173       FOR_EACH_VEC_ELT (dir_vects, j, a_dir_v)
4174         if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
4175           break;
4176
4177       if (j == dist_vects.length ())
4178         {
4179           fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
4180           print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
4181           fprintf (file, "not found in Omega dir vectors:\n");
4182           print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
4183           fprintf (file, "data dependence relation:\n");
4184           dump_data_dependence_relation (file, ddr);
4185           fprintf (file, ")\n");
4186         }
4187     }
4188
4189   return true;
4190 }
4191
4192 /* This computes the affine dependence relation between A and B with
4193    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
4194    independence between two accesses, while CHREC_DONT_KNOW is used
4195    for representing the unknown relation.
4196
4197    Note that it is possible to stop the computation of the dependence
4198    relation the first time we detect a CHREC_KNOWN element for a given
4199    subscript.  */
4200
4201 void
4202 compute_affine_dependence (struct data_dependence_relation *ddr,
4203                            struct loop *loop_nest)
4204 {
4205   struct data_reference *dra = DDR_A (ddr);
4206   struct data_reference *drb = DDR_B (ddr);
4207
4208   if (dump_file && (dump_flags & TDF_DETAILS))
4209     {
4210       fprintf (dump_file, "(compute_affine_dependence\n");
4211       fprintf (dump_file, "  stmt_a: ");
4212       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4213       fprintf (dump_file, "  stmt_b: ");
4214       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4215     }
4216
4217   /* Analyze only when the dependence relation is not yet known.  */
4218   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4219     {
4220       dependence_stats.num_dependence_tests++;
4221
4222       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4223           && access_functions_are_affine_or_constant_p (drb, loop_nest))
4224         {
4225           subscript_dependence_tester (ddr, loop_nest);
4226
4227           if (flag_check_data_deps)
4228             {
4229               /* Dump the dependences from the first algorithm.  */
4230               if (dump_file && (dump_flags & TDF_DETAILS))
4231                 {
4232                   fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4233                   dump_data_dependence_relation (dump_file, ddr);
4234                 }
4235
4236               if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4237                 {
4238                   bool maybe_dependent;
4239                   vec<lambda_vector> dir_vects, dist_vects;
4240
4241                   /* Save the result of the first DD analyzer.  */
4242                   dist_vects = DDR_DIST_VECTS (ddr);
4243                   dir_vects = DDR_DIR_VECTS (ddr);
4244
4245                   /* Reset the information.  */
4246                   DDR_DIST_VECTS (ddr).create (0);
4247                   DDR_DIR_VECTS (ddr).create (0);
4248
4249                   /* Compute the same information using Omega.  */
4250                   if (!init_omega_for_ddr (ddr, &maybe_dependent))
4251                     goto csys_dont_know;
4252
4253                   if (dump_file && (dump_flags & TDF_DETAILS))
4254                     {
4255                       fprintf (dump_file, "Omega Analyzer\n");
4256                       dump_data_dependence_relation (dump_file, ddr);
4257                     }
4258
4259                   /* Check that we get the same information.  */
4260                   if (maybe_dependent)
4261                     gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4262                                                   dir_vects));
4263                 }
4264             }
4265         }
4266
4267       /* As a last case, if the dependence cannot be determined, or if
4268          the dependence is considered too difficult to determine, answer
4269          "don't know".  */
4270       else
4271         {
4272         csys_dont_know:;
4273           dependence_stats.num_dependence_undetermined++;
4274
4275           if (dump_file && (dump_flags & TDF_DETAILS))
4276             {
4277               fprintf (dump_file, "Data ref a:\n");
4278               dump_data_reference (dump_file, dra);
4279               fprintf (dump_file, "Data ref b:\n");
4280               dump_data_reference (dump_file, drb);
4281               fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4282             }
4283           finalize_ddr_dependent (ddr, chrec_dont_know);
4284         }
4285     }
4286
4287   if (dump_file && (dump_flags & TDF_DETAILS))
4288     {
4289       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4290         fprintf (dump_file, ") -> no dependence\n");
4291       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4292         fprintf (dump_file, ") -> dependence analysis failed\n");
4293       else
4294         fprintf (dump_file, ")\n");
4295     }
4296 }
4297
4298 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4299    the data references in DATAREFS, in the LOOP_NEST.  When
4300    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4301    relations.  Return true when successful, i.e. data references number
4302    is small enough to be handled.  */
4303
4304 bool
4305 compute_all_dependences (vec<data_reference_p> datarefs,
4306                          vec<ddr_p> *dependence_relations,
4307                          vec<loop_p> loop_nest,
4308                          bool compute_self_and_rr)
4309 {
4310   struct data_dependence_relation *ddr;
4311   struct data_reference *a, *b;
4312   unsigned int i, j;
4313
4314   if ((int) datarefs.length ()
4315       > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4316     {
4317       struct data_dependence_relation *ddr;
4318
4319       /* Insert a single relation into dependence_relations:
4320          chrec_dont_know.  */
4321       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4322       dependence_relations->safe_push (ddr);
4323       return false;
4324     }
4325
4326   FOR_EACH_VEC_ELT (datarefs, i, a)
4327     for (j = i + 1; datarefs.iterate (j, &b); j++)
4328       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4329         {
4330           ddr = initialize_data_dependence_relation (a, b, loop_nest);
4331           dependence_relations->safe_push (ddr);
4332           if (loop_nest.exists ())
4333             compute_affine_dependence (ddr, loop_nest[0]);
4334         }
4335
4336   if (compute_self_and_rr)
4337     FOR_EACH_VEC_ELT (datarefs, i, a)
4338       {
4339         ddr = initialize_data_dependence_relation (a, a, loop_nest);
4340         dependence_relations->safe_push (ddr);
4341         if (loop_nest.exists ())
4342           compute_affine_dependence (ddr, loop_nest[0]);
4343       }
4344
4345   return true;
4346 }
4347
4348 /* Describes a location of a memory reference.  */
4349
4350 typedef struct data_ref_loc_d
4351 {
4352   /* The memory reference.  */
4353   tree ref;
4354
4355   /* True if the memory reference is read.  */
4356   bool is_read;
4357 } data_ref_loc;
4358
4359
4360 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
4361    true if STMT clobbers memory, false otherwise.  */
4362
4363 static bool
4364 get_references_in_stmt (gimple stmt, vec<data_ref_loc, va_heap> *references)
4365 {
4366   bool clobbers_memory = false;
4367   data_ref_loc ref;
4368   tree op0, op1;
4369   enum gimple_code stmt_code = gimple_code (stmt);
4370
4371   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4372      As we cannot model data-references to not spelled out
4373      accesses give up if they may occur.  */
4374   if (stmt_code == GIMPLE_CALL
4375       && !(gimple_call_flags (stmt) & ECF_CONST))
4376     {
4377       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
4378       if (gimple_call_internal_p (stmt))
4379         switch (gimple_call_internal_fn (stmt))
4380           {
4381           case IFN_GOMP_SIMD_LANE:
4382             {
4383               struct loop *loop = gimple_bb (stmt)->loop_father;
4384               tree uid = gimple_call_arg (stmt, 0);
4385               gcc_assert (TREE_CODE (uid) == SSA_NAME);
4386               if (loop == NULL
4387                   || loop->simduid != SSA_NAME_VAR (uid))
4388                 clobbers_memory = true;
4389               break;
4390             }
4391           case IFN_MASK_LOAD:
4392           case IFN_MASK_STORE:
4393             break;
4394           default:
4395             clobbers_memory = true;
4396             break;
4397           }
4398       else
4399         clobbers_memory = true;
4400     }
4401   else if (stmt_code == GIMPLE_ASM
4402            && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
4403                || gimple_vuse (stmt)))
4404     clobbers_memory = true;
4405
4406   if (!gimple_vuse (stmt))
4407     return clobbers_memory;
4408
4409   if (stmt_code == GIMPLE_ASSIGN)
4410     {
4411       tree base;
4412       op0 = gimple_assign_lhs (stmt);
4413       op1 = gimple_assign_rhs1 (stmt);
4414
4415       if (DECL_P (op1)
4416           || (REFERENCE_CLASS_P (op1)
4417               && (base = get_base_address (op1))
4418               && TREE_CODE (base) != SSA_NAME))
4419         {
4420           ref.ref = op1;
4421           ref.is_read = true;
4422           references->safe_push (ref);
4423         }
4424     }
4425   else if (stmt_code == GIMPLE_CALL)
4426     {
4427       unsigned i, n;
4428
4429       ref.is_read = false;
4430       if (gimple_call_internal_p (stmt))
4431         switch (gimple_call_internal_fn (stmt))
4432           {
4433           case IFN_MASK_LOAD:
4434             if (gimple_call_lhs (stmt) == NULL_TREE)
4435               break;
4436             ref.is_read = true;
4437           case IFN_MASK_STORE:
4438             ref.ref = fold_build2 (MEM_REF,
4439                                    ref.is_read
4440                                    ? TREE_TYPE (gimple_call_lhs (stmt))
4441                                    : TREE_TYPE (gimple_call_arg (stmt, 3)),
4442                                    gimple_call_arg (stmt, 0),
4443                                    gimple_call_arg (stmt, 1));
4444             references->safe_push (ref);
4445             return false;
4446           default:
4447             break;
4448           }
4449
4450       op0 = gimple_call_lhs (stmt);
4451       n = gimple_call_num_args (stmt);
4452       for (i = 0; i < n; i++)
4453         {
4454           op1 = gimple_call_arg (stmt, i);
4455
4456           if (DECL_P (op1)
4457               || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
4458             {
4459               ref.ref = op1;
4460               ref.is_read = true;
4461               references->safe_push (ref);
4462             }
4463         }
4464     }
4465   else
4466     return clobbers_memory;
4467
4468   if (op0
4469       && (DECL_P (op0)
4470           || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
4471     {
4472       ref.ref = op0;
4473       ref.is_read = false;
4474       references->safe_push (ref);
4475     }
4476   return clobbers_memory;
4477 }
4478
4479 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
4480    reference, returns false, otherwise returns true.  NEST is the outermost
4481    loop of the loop nest in which the references should be analyzed.  */
4482
4483 bool
4484 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4485                               vec<data_reference_p> *datarefs)
4486 {
4487   unsigned i;
4488   auto_vec<data_ref_loc, 2> references;
4489   data_ref_loc *ref;
4490   bool ret = true;
4491   data_reference_p dr;
4492
4493   if (get_references_in_stmt (stmt, &references))
4494     return false;
4495
4496   FOR_EACH_VEC_ELT (references, i, ref)
4497     {
4498       dr = create_data_ref (nest, loop_containing_stmt (stmt),
4499                             ref->ref, stmt, ref->is_read);
4500       gcc_assert (dr != NULL);
4501       datarefs->safe_push (dr);
4502     }
4503   references.release ();
4504   return ret;
4505 }
4506
4507 /* Stores the data references in STMT to DATAREFS.  If there is an
4508    unanalyzable reference, returns false, otherwise returns true.
4509    NEST is the outermost loop of the loop nest in which the references
4510    should be instantiated, LOOP is the loop in which the references
4511    should be analyzed.  */
4512
4513 bool
4514 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4515                                        vec<data_reference_p> *datarefs)
4516 {
4517   unsigned i;
4518   auto_vec<data_ref_loc, 2> references;
4519   data_ref_loc *ref;
4520   bool ret = true;
4521   data_reference_p dr;
4522
4523   if (get_references_in_stmt (stmt, &references))
4524     return false;
4525
4526   FOR_EACH_VEC_ELT (references, i, ref)
4527     {
4528       dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read);
4529       gcc_assert (dr != NULL);
4530       datarefs->safe_push (dr);
4531     }
4532
4533   references.release ();
4534   return ret;
4535 }
4536
4537 /* Search the data references in LOOP, and record the information into
4538    DATAREFS.  Returns chrec_dont_know when failing to analyze a
4539    difficult case, returns NULL_TREE otherwise.  */
4540
4541 tree
4542 find_data_references_in_bb (struct loop *loop, basic_block bb,
4543                             vec<data_reference_p> *datarefs)
4544 {
4545   gimple_stmt_iterator bsi;
4546
4547   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4548     {
4549       gimple stmt = gsi_stmt (bsi);
4550
4551       if (!find_data_references_in_stmt (loop, stmt, datarefs))
4552         {
4553           struct data_reference *res;
4554           res = XCNEW (struct data_reference);
4555           datarefs->safe_push (res);
4556
4557           return chrec_dont_know;
4558         }
4559     }
4560
4561   return NULL_TREE;
4562 }
4563
4564 /* Search the data references in LOOP, and record the information into
4565    DATAREFS.  Returns chrec_dont_know when failing to analyze a
4566    difficult case, returns NULL_TREE otherwise.
4567
4568    TODO: This function should be made smarter so that it can handle address
4569    arithmetic as if they were array accesses, etc.  */
4570
4571 tree
4572 find_data_references_in_loop (struct loop *loop,
4573                               vec<data_reference_p> *datarefs)
4574 {
4575   basic_block bb, *bbs;
4576   unsigned int i;
4577
4578   bbs = get_loop_body_in_dom_order (loop);
4579
4580   for (i = 0; i < loop->num_nodes; i++)
4581     {
4582       bb = bbs[i];
4583
4584       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4585         {
4586           free (bbs);
4587           return chrec_dont_know;
4588         }
4589     }
4590   free (bbs);
4591
4592   return NULL_TREE;
4593 }
4594
4595 /* Recursive helper function.  */
4596
4597 static bool
4598 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
4599 {
4600   /* Inner loops of the nest should not contain siblings.  Example:
4601      when there are two consecutive loops,
4602
4603      | loop_0
4604      |   loop_1
4605      |     A[{0, +, 1}_1]
4606      |   endloop_1
4607      |   loop_2
4608      |     A[{0, +, 1}_2]
4609      |   endloop_2
4610      | endloop_0
4611
4612      the dependence relation cannot be captured by the distance
4613      abstraction.  */
4614   if (loop->next)
4615     return false;
4616
4617   loop_nest->safe_push (loop);
4618   if (loop->inner)
4619     return find_loop_nest_1 (loop->inner, loop_nest);
4620   return true;
4621 }
4622
4623 /* Return false when the LOOP is not well nested.  Otherwise return
4624    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
4625    contain the loops from the outermost to the innermost, as they will
4626    appear in the classic distance vector.  */
4627
4628 bool
4629 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
4630 {
4631   loop_nest->safe_push (loop);
4632   if (loop->inner)
4633     return find_loop_nest_1 (loop->inner, loop_nest);
4634   return true;
4635 }
4636
4637 /* Returns true when the data dependences have been computed, false otherwise.
4638    Given a loop nest LOOP, the following vectors are returned:
4639    DATAREFS is initialized to all the array elements contained in this loop,
4640    DEPENDENCE_RELATIONS contains the relations between the data references.
4641    Compute read-read and self relations if
4642    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
4643
4644 bool
4645 compute_data_dependences_for_loop (struct loop *loop,
4646                                    bool compute_self_and_read_read_dependences,
4647                                    vec<loop_p> *loop_nest,
4648                                    vec<data_reference_p> *datarefs,
4649                                    vec<ddr_p> *dependence_relations)
4650 {
4651   bool res = true;
4652
4653   memset (&dependence_stats, 0, sizeof (dependence_stats));
4654
4655   /* If the loop nest is not well formed, or one of the data references
4656      is not computable, give up without spending time to compute other
4657      dependences.  */
4658   if (!loop
4659       || !find_loop_nest (loop, loop_nest)
4660       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
4661       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4662                                    compute_self_and_read_read_dependences))
4663     res = false;
4664
4665   if (dump_file && (dump_flags & TDF_STATS))
4666     {
4667       fprintf (dump_file, "Dependence tester statistics:\n");
4668
4669       fprintf (dump_file, "Number of dependence tests: %d\n",
4670                dependence_stats.num_dependence_tests);
4671       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4672                dependence_stats.num_dependence_dependent);
4673       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4674                dependence_stats.num_dependence_independent);
4675       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4676                dependence_stats.num_dependence_undetermined);
4677
4678       fprintf (dump_file, "Number of subscript tests: %d\n",
4679                dependence_stats.num_subscript_tests);
4680       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4681                dependence_stats.num_subscript_undetermined);
4682       fprintf (dump_file, "Number of same subscript function: %d\n",
4683                dependence_stats.num_same_subscript_function);
4684
4685       fprintf (dump_file, "Number of ziv tests: %d\n",
4686                dependence_stats.num_ziv);
4687       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4688                dependence_stats.num_ziv_dependent);
4689       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4690                dependence_stats.num_ziv_independent);
4691       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4692                dependence_stats.num_ziv_unimplemented);
4693
4694       fprintf (dump_file, "Number of siv tests: %d\n",
4695                dependence_stats.num_siv);
4696       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4697                dependence_stats.num_siv_dependent);
4698       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4699                dependence_stats.num_siv_independent);
4700       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4701                dependence_stats.num_siv_unimplemented);
4702
4703       fprintf (dump_file, "Number of miv tests: %d\n",
4704                dependence_stats.num_miv);
4705       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4706                dependence_stats.num_miv_dependent);
4707       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4708                dependence_stats.num_miv_independent);
4709       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4710                dependence_stats.num_miv_unimplemented);
4711     }
4712
4713   return res;
4714 }
4715
4716 /* Returns true when the data dependences for the basic block BB have been
4717    computed, false otherwise.
4718    DATAREFS is initialized to all the array elements contained in this basic
4719    block, DEPENDENCE_RELATIONS contains the relations between the data
4720    references. Compute read-read and self relations if
4721    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
4722 bool
4723 compute_data_dependences_for_bb (basic_block bb,
4724                                  bool compute_self_and_read_read_dependences,
4725                                  vec<data_reference_p> *datarefs,
4726                                  vec<ddr_p> *dependence_relations)
4727 {
4728   if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4729     return false;
4730
4731   return compute_all_dependences (*datarefs, dependence_relations, vNULL,
4732                                   compute_self_and_read_read_dependences);
4733 }
4734
4735 /* Entry point (for testing only).  Analyze all the data references
4736    and the dependence relations in LOOP.
4737
4738    The data references are computed first.
4739
4740    A relation on these nodes is represented by a complete graph.  Some
4741    of the relations could be of no interest, thus the relations can be
4742    computed on demand.
4743
4744    In the following function we compute all the relations.  This is
4745    just a first implementation that is here for:
4746    - for showing how to ask for the dependence relations,
4747    - for the debugging the whole dependence graph,
4748    - for the dejagnu testcases and maintenance.
4749
4750    It is possible to ask only for a part of the graph, avoiding to
4751    compute the whole dependence graph.  The computed dependences are
4752    stored in a knowledge base (KB) such that later queries don't
4753    recompute the same information.  The implementation of this KB is
4754    transparent to the optimizer, and thus the KB can be changed with a
4755    more efficient implementation, or the KB could be disabled.  */
4756 static void
4757 analyze_all_data_dependences (struct loop *loop)
4758 {
4759   unsigned int i;
4760   int nb_data_refs = 10;
4761   vec<data_reference_p> datarefs;
4762   datarefs.create (nb_data_refs);
4763   vec<ddr_p> dependence_relations;
4764   dependence_relations.create (nb_data_refs * nb_data_refs);
4765   vec<loop_p> loop_nest;
4766   loop_nest.create (3);
4767
4768   /* Compute DDs on the whole function.  */
4769   compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4770                                      &dependence_relations);
4771
4772   if (dump_file)
4773     {
4774       dump_data_dependence_relations (dump_file, dependence_relations);
4775       fprintf (dump_file, "\n\n");
4776
4777       if (dump_flags & TDF_DETAILS)
4778         dump_dist_dir_vectors (dump_file, dependence_relations);
4779
4780       if (dump_flags & TDF_STATS)
4781         {
4782           unsigned nb_top_relations = 0;
4783           unsigned nb_bot_relations = 0;
4784           unsigned nb_chrec_relations = 0;
4785           struct data_dependence_relation *ddr;
4786
4787           FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
4788             {
4789               if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4790                 nb_top_relations++;
4791
4792               else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4793                 nb_bot_relations++;
4794
4795               else
4796                 nb_chrec_relations++;
4797             }
4798
4799           gather_stats_on_scev_database ();
4800         }
4801     }
4802
4803   loop_nest.release ();
4804   free_dependence_relations (dependence_relations);
4805   free_data_refs (datarefs);
4806 }
4807
4808 /* Computes all the data dependences and check that the results of
4809    several analyzers are the same.  */
4810
4811 void
4812 tree_check_data_deps (void)
4813 {
4814   struct loop *loop_nest;
4815
4816   FOR_EACH_LOOP (loop_nest, 0)
4817     analyze_all_data_dependences (loop_nest);
4818 }
4819
4820 /* Free the memory used by a data dependence relation DDR.  */
4821
4822 void
4823 free_dependence_relation (struct data_dependence_relation *ddr)
4824 {
4825   if (ddr == NULL)
4826     return;
4827
4828   if (DDR_SUBSCRIPTS (ddr).exists ())
4829     free_subscripts (DDR_SUBSCRIPTS (ddr));
4830   DDR_DIST_VECTS (ddr).release ();
4831   DDR_DIR_VECTS (ddr).release ();
4832
4833   free (ddr);
4834 }
4835
4836 /* Free the memory used by the data dependence relations from
4837    DEPENDENCE_RELATIONS.  */
4838
4839 void
4840 free_dependence_relations (vec<ddr_p> dependence_relations)
4841 {
4842   unsigned int i;
4843   struct data_dependence_relation *ddr;
4844
4845   FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
4846     if (ddr)
4847       free_dependence_relation (ddr);
4848
4849   dependence_relations.release ();
4850 }
4851
4852 /* Free the memory used by the data references from DATAREFS.  */
4853
4854 void
4855 free_data_refs (vec<data_reference_p> datarefs)
4856 {
4857   unsigned int i;
4858   struct data_reference *dr;
4859
4860   FOR_EACH_VEC_ELT (datarefs, i, dr)
4861     free_data_ref (dr);
4862   datarefs.release ();
4863 }