gcc/tree-data-ref.c

   1 /* Data references and dependences detectors.
   2    Copyright (C) 2003-2019 Free Software Foundation, Inc.
   3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass walks a given loop structure searching for array
  22    references.  The information about the array accesses is recorded
  23    in DATA_REFERENCE structures.
  24
  25    The basic test for determining the dependences is:
  26    given two access functions chrec1 and chrec2 to a same array, and
  27    x and y two vectors from the iteration domain, the same element of
  28    the array is accessed twice at iterations x and y if and only if:
  29    |             chrec1 (x) == chrec2 (y).
  30
  31    The goals of this analysis are:
  32
  33    - to determine the independence: the relation between two
  34      independent accesses is qualified with the chrec_known (this
  35      information allows a loop parallelization),
  36
  37    - when two data references access the same data, to qualify the
  38      dependence relation with classic dependence representations:
  39
  40        - distance vectors
  41        - direction vectors
  42        - loop carried level dependence
  43        - polyhedron dependence
  44      or with the chains of recurrences based representation,
  45
  46    - to define a knowledge base for storing the data dependence
  47      information,
  48
  49    - to define an interface to access this data.
  50
  51
  52    Definitions:
  53
  54    - subscript: given two array accesses a subscript is the tuple
  55    composed of the access functions for a given dimension.  Example:
  56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
  57    (f1, g1), (f2, g2), (f3, g3).
  58
  59    - Diophantine equation: an equation whose coefficients and
  60    solutions are integer constants, for example the equation
  61    |   3*x + 2*y = 1
  62    has an integer solution x = 1 and y = -1.
  63
  64    References:
  65
  66    - "Advanced Compilation for High Performance Computing" by Randy
  67    Allen and Ken Kennedy.
  68    http://citeseer.ist.psu.edu/goff91practical.html
  69
  70    - "Loop Transformations for Restructuring Compilers - The Foundations"
  71    by Utpal Banerjee.
  72
  73
  74 */
  75
  76 #include "config.h"
  77 #include "system.h"
  78 #include "coretypes.h"
  79 #include "backend.h"
  80 #include "rtl.h"
  81 #include "tree.h"
  82 #include "gimple.h"
  83 #include "gimple-pretty-print.h"
  84 #include "alias.h"
  85 #include "fold-const.h"
  86 #include "expr.h"
  87 #include "gimple-iterator.h"
  88 #include "tree-ssa-loop-niter.h"
  89 #include "tree-ssa-loop.h"
  90 #include "tree-ssa.h"
  91 #include "cfgloop.h"
  92 #include "tree-data-ref.h"
  93 #include "tree-scalar-evolution.h"
  94 #include "dumpfile.h"
  95 #include "tree-affine.h"
  96 #include "params.h"
  97 #include "builtins.h"
  98 #include "tree-eh.h"
  99 #include "ssa.h"
 100
 101 static struct datadep_stats
 102 {
 103   int num_dependence_tests;
 104   int num_dependence_dependent;
 105   int num_dependence_independent;
 106   int num_dependence_undetermined;
 107
 108   int num_subscript_tests;
 109   int num_subscript_undetermined;
 110   int num_same_subscript_function;
 111
 112   int num_ziv;
 113   int num_ziv_independent;
 114   int num_ziv_dependent;
 115   int num_ziv_unimplemented;
 116
 117   int num_siv;
 118   int num_siv_independent;
 119   int num_siv_dependent;
 120   int num_siv_unimplemented;
 121
 122   int num_miv;
 123   int num_miv_independent;
 124   int num_miv_dependent;
 125   int num_miv_unimplemented;
 126 } dependence_stats;
 127
 128 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
 129                                            unsigned int, unsigned int,
 130                                            struct loop *);
 131 /* Returns true iff A divides B.  */
 132
 133 static inline bool
 134 tree_fold_divides_p (const_tree a, const_tree b)
 135 {
 136   gcc_assert (TREE_CODE (a) == INTEGER_CST);
 137   gcc_assert (TREE_CODE (b) == INTEGER_CST);
 138   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
 139 }
 140
 141 /* Returns true iff A divides B.  */
 142
 143 static inline bool
 144 int_divides_p (int a, int b)
 145 {
 146   return ((b % a) == 0);
 147 }
 148
 149 /* Return true if reference REF contains a union access.  */
 150
 151 static bool
 152 ref_contains_union_access_p (tree ref)
 153 {
 154   while (handled_component_p (ref))
 155     {
 156       ref = TREE_OPERAND (ref, 0);
 157       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
 158           || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
 159         return true;
 160     }
 161   return false;
 162 }
 163
 164 \f
 165
 166 /* Dump into FILE all the data references from DATAREFS.  */
 167
 168 static void
 169 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
 170 {
 171   unsigned int i;
 172   struct data_reference *dr;
 173
 174   FOR_EACH_VEC_ELT (datarefs, i, dr)
 175     dump_data_reference (file, dr);
 176 }
 177
 178 /* Unified dump into FILE all the data references from DATAREFS.  */
 179
 180 DEBUG_FUNCTION void
 181 debug (vec<data_reference_p> &ref)
 182 {
 183   dump_data_references (stderr, ref);
 184 }
 185
 186 DEBUG_FUNCTION void
 187 debug (vec<data_reference_p> *ptr)
 188 {
 189   if (ptr)
 190     debug (*ptr);
 191   else
 192     fprintf (stderr, "<nil>\n");
 193 }
 194
 195
 196 /* Dump into STDERR all the data references from DATAREFS.  */
 197
 198 DEBUG_FUNCTION void
 199 debug_data_references (vec<data_reference_p> datarefs)
 200 {
 201   dump_data_references (stderr, datarefs);
 202 }
 203
 204 /* Print to STDERR the data_reference DR.  */
 205
 206 DEBUG_FUNCTION void
 207 debug_data_reference (struct data_reference *dr)
 208 {
 209   dump_data_reference (stderr, dr);
 210 }
 211
 212 /* Dump function for a DATA_REFERENCE structure.  */
 213
 214 void
 215 dump_data_reference (FILE *outf,
 216                      struct data_reference *dr)
 217 {
 218   unsigned int i;
 219
 220   fprintf (outf, "#(Data Ref: \n");
 221   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
 222   fprintf (outf, "#  stmt: ");
 223   print_gimple_stmt (outf, DR_STMT (dr), 0);
 224   fprintf (outf, "#  ref: ");
 225   print_generic_stmt (outf, DR_REF (dr));
 226   fprintf (outf, "#  base_object: ");
 227   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
 228
 229   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
 230     {
 231       fprintf (outf, "#  Access function %d: ", i);
 232       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
 233     }
 234   fprintf (outf, "#)\n");
 235 }
 236
 237 /* Unified dump function for a DATA_REFERENCE structure.  */
 238
 239 DEBUG_FUNCTION void
 240 debug (data_reference &ref)
 241 {
 242   dump_data_reference (stderr, &ref);
 243 }
 244
 245 DEBUG_FUNCTION void
 246 debug (data_reference *ptr)
 247 {
 248   if (ptr)
 249     debug (*ptr);
 250   else
 251     fprintf (stderr, "<nil>\n");
 252 }
 253
 254
 255 /* Dumps the affine function described by FN to the file OUTF.  */
 256
 257 DEBUG_FUNCTION void
 258 dump_affine_function (FILE *outf, affine_fn fn)
 259 {
 260   unsigned i;
 261   tree coef;
 262
 263   print_generic_expr (outf, fn[0], TDF_SLIM);
 264   for (i = 1; fn.iterate (i, &coef); i++)
 265     {
 266       fprintf (outf, " + ");
 267       print_generic_expr (outf, coef, TDF_SLIM);
 268       fprintf (outf, " * x_%u", i);
 269     }
 270 }
 271
 272 /* Dumps the conflict function CF to the file OUTF.  */
 273
 274 DEBUG_FUNCTION void
 275 dump_conflict_function (FILE *outf, conflict_function *cf)
 276 {
 277   unsigned i;
 278
 279   if (cf->n == NO_DEPENDENCE)
 280     fprintf (outf, "no dependence");
 281   else if (cf->n == NOT_KNOWN)
 282     fprintf (outf, "not known");
 283   else
 284     {
 285       for (i = 0; i < cf->n; i++)
 286         {
 287           if (i != 0)
 288             fprintf (outf, " ");
 289           fprintf (outf, "[");
 290           dump_affine_function (outf, cf->fns[i]);
 291           fprintf (outf, "]");
 292         }
 293     }
 294 }
 295
 296 /* Dump function for a SUBSCRIPT structure.  */
 297
 298 DEBUG_FUNCTION void
 299 dump_subscript (FILE *outf, struct subscript *subscript)
 300 {
 301   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
 302
 303   fprintf (outf, "\n (subscript \n");
 304   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
 305   dump_conflict_function (outf, cf);
 306   if (CF_NONTRIVIAL_P (cf))
 307     {
 308       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 309       fprintf (outf, "\n  last_conflict: ");
 310       print_generic_expr (outf, last_iteration);
 311     }
 312
 313   cf = SUB_CONFLICTS_IN_B (subscript);
 314   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
 315   dump_conflict_function (outf, cf);
 316   if (CF_NONTRIVIAL_P (cf))
 317     {
 318       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 319       fprintf (outf, "\n  last_conflict: ");
 320       print_generic_expr (outf, last_iteration);
 321     }
 322
 323   fprintf (outf, "\n  (Subscript distance: ");
 324   print_generic_expr (outf, SUB_DISTANCE (subscript));
 325   fprintf (outf, " ))\n");
 326 }
 327
 328 /* Print the classic direction vector DIRV to OUTF.  */
 329
 330 DEBUG_FUNCTION void
 331 print_direction_vector (FILE *outf,
 332                         lambda_vector dirv,
 333                         int length)
 334 {
 335   int eq;
 336
 337   for (eq = 0; eq < length; eq++)
 338     {
 339       enum data_dependence_direction dir = ((enum data_dependence_direction)
 340                                             dirv[eq]);
 341
 342       switch (dir)
 343         {
 344         case dir_positive:
 345           fprintf (outf, "    +");
 346           break;
 347         case dir_negative:
 348           fprintf (outf, "    -");
 349           break;
 350         case dir_equal:
 351           fprintf (outf, "    =");
 352           break;
 353         case dir_positive_or_equal:
 354           fprintf (outf, "   +=");
 355           break;
 356         case dir_positive_or_negative:
 357           fprintf (outf, "   +-");
 358           break;
 359         case dir_negative_or_equal:
 360           fprintf (outf, "   -=");
 361           break;
 362         case dir_star:
 363           fprintf (outf, "    *");
 364           break;
 365         default:
 366           fprintf (outf, "indep");
 367           break;
 368         }
 369     }
 370   fprintf (outf, "\n");
 371 }
 372
 373 /* Print a vector of direction vectors.  */
 374
 375 DEBUG_FUNCTION void
 376 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
 377                    int length)
 378 {
 379   unsigned j;
 380   lambda_vector v;
 381
 382   FOR_EACH_VEC_ELT (dir_vects, j, v)
 383     print_direction_vector (outf, v, length);
 384 }
 385
 386 /* Print out a vector VEC of length N to OUTFILE.  */
 387
 388 DEBUG_FUNCTION void
 389 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
 390 {
 391   int i;
 392
 393   for (i = 0; i < n; i++)
 394     fprintf (outfile, "%3d ", (int)vector[i]);
 395   fprintf (outfile, "\n");
 396 }
 397
 398 /* Print a vector of distance vectors.  */
 399
 400 DEBUG_FUNCTION void
 401 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
 402                     int length)
 403 {
 404   unsigned j;
 405   lambda_vector v;
 406
 407   FOR_EACH_VEC_ELT (dist_vects, j, v)
 408     print_lambda_vector (outf, v, length);
 409 }
 410
 411 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
 412
 413 DEBUG_FUNCTION void
 414 dump_data_dependence_relation (FILE *outf,
 415                                struct data_dependence_relation *ddr)
 416 {
 417   struct data_reference *dra, *drb;
 418
 419   fprintf (outf, "(Data Dep: \n");
 420
 421   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 422     {
 423       if (ddr)
 424         {
 425           dra = DDR_A (ddr);
 426           drb = DDR_B (ddr);
 427           if (dra)
 428             dump_data_reference (outf, dra);
 429           else
 430             fprintf (outf, "    (nil)\n");
 431           if (drb)
 432             dump_data_reference (outf, drb);
 433           else
 434             fprintf (outf, "    (nil)\n");
 435         }
 436       fprintf (outf, "    (don't know)\n)\n");
 437       return;
 438     }
 439
 440   dra = DDR_A (ddr);
 441   drb = DDR_B (ddr);
 442   dump_data_reference (outf, dra);
 443   dump_data_reference (outf, drb);
 444
 445   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 446     fprintf (outf, "    (no dependence)\n");
 447
 448   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
 449     {
 450       unsigned int i;
 451       struct loop *loopi;
 452
 453       subscript *sub;
 454       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
 455         {
 456           fprintf (outf, "  access_fn_A: ");
 457           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
 458           fprintf (outf, "  access_fn_B: ");
 459           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
 460           dump_subscript (outf, sub);
 461         }
 462
 463       fprintf (outf, "  inner loop index: %d\n", DDR_INNER_LOOP (ddr));
 464       fprintf (outf, "  loop nest: (");
 465       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
 466         fprintf (outf, "%d ", loopi->num);
 467       fprintf (outf, ")\n");
 468
 469       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
 470         {
 471           fprintf (outf, "  distance_vector: ");
 472           print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
 473                                DDR_NB_LOOPS (ddr));
 474         }
 475
 476       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
 477         {
 478           fprintf (outf, "  direction_vector: ");
 479           print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
 480                                   DDR_NB_LOOPS (ddr));
 481         }
 482     }
 483
 484   fprintf (outf, ")\n");
 485 }
 486
 487 /* Debug version.  */
 488
 489 DEBUG_FUNCTION void
 490 debug_data_dependence_relation (struct data_dependence_relation *ddr)
 491 {
 492   dump_data_dependence_relation (stderr, ddr);
 493 }
 494
 495 /* Dump into FILE all the dependence relations from DDRS.  */
 496
 497 DEBUG_FUNCTION void
 498 dump_data_dependence_relations (FILE *file,
 499                                 vec<ddr_p> ddrs)
 500 {
 501   unsigned int i;
 502   struct data_dependence_relation *ddr;
 503
 504   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 505     dump_data_dependence_relation (file, ddr);
 506 }
 507
 508 DEBUG_FUNCTION void
 509 debug (vec<ddr_p> &ref)
 510 {
 511   dump_data_dependence_relations (stderr, ref);
 512 }
 513
 514 DEBUG_FUNCTION void
 515 debug (vec<ddr_p> *ptr)
 516 {
 517   if (ptr)
 518     debug (*ptr);
 519   else
 520     fprintf (stderr, "<nil>\n");
 521 }
 522
 523
 524 /* Dump to STDERR all the dependence relations from DDRS.  */
 525
 526 DEBUG_FUNCTION void
 527 debug_data_dependence_relations (vec<ddr_p> ddrs)
 528 {
 529   dump_data_dependence_relations (stderr, ddrs);
 530 }
 531
 532 /* Dumps the distance and direction vectors in FILE.  DDRS contains
 533    the dependence relations, and VECT_SIZE is the size of the
 534    dependence vectors, or in other words the number of loops in the
 535    considered nest.  */
 536
 537 DEBUG_FUNCTION void
 538 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
 539 {
 540   unsigned int i, j;
 541   struct data_dependence_relation *ddr;
 542   lambda_vector v;
 543
 544   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 545     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
 546       {
 547         FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
 548           {
 549             fprintf (file, "DISTANCE_V (");
 550             print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
 551             fprintf (file, ")\n");
 552           }
 553
 554         FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
 555           {
 556             fprintf (file, "DIRECTION_V (");
 557             print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
 558             fprintf (file, ")\n");
 559           }
 560       }
 561
 562   fprintf (file, "\n\n");
 563 }
 564
 565 /* Dumps the data dependence relations DDRS in FILE.  */
 566
 567 DEBUG_FUNCTION void
 568 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
 569 {
 570   unsigned int i;
 571   struct data_dependence_relation *ddr;
 572
 573   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 574     dump_data_dependence_relation (file, ddr);
 575
 576   fprintf (file, "\n\n");
 577 }
 578
 579 DEBUG_FUNCTION void
 580 debug_ddrs (vec<ddr_p> ddrs)
 581 {
 582   dump_ddrs (stderr, ddrs);
 583 }
 584
 585 static void
 586 split_constant_offset (tree exp, tree *var, tree *off,
 587                        hash_map<tree, std::pair<tree, tree> > &cache);
 588
 589 /* Helper function for split_constant_offset.  Expresses OP0 CODE OP1
 590    (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
 591    constant of type ssizetype, and returns true.  If we cannot do this
 592    with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
 593    is returned.  */
 594
 595 static bool
 596 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 597                          tree *var, tree *off,
 598                          hash_map<tree, std::pair<tree, tree> > &cache)
 599 {
 600   tree var0, var1;
 601   tree off0, off1;
 602   enum tree_code ocode = code;
 603
 604   *var = NULL_TREE;
 605   *off = NULL_TREE;
 606
 607   switch (code)
 608     {
 609     case INTEGER_CST:
 610       *var = build_int_cst (type, 0);
 611       *off = fold_convert (ssizetype, op0);
 612       return true;
 613
 614     case POINTER_PLUS_EXPR:
 615       ocode = PLUS_EXPR;
 616       /* FALLTHROUGH */
 617     case PLUS_EXPR:
 618     case MINUS_EXPR:
 619       split_constant_offset (op0, &var0, &off0, cache);
 620       split_constant_offset (op1, &var1, &off1, cache);
 621       *var = fold_build2 (code, type, var0, var1);
 622       *off = size_binop (ocode, off0, off1);
 623       return true;
 624
 625     case MULT_EXPR:
 626       if (TREE_CODE (op1) != INTEGER_CST)
 627         return false;
 628
 629       split_constant_offset (op0, &var0, &off0, cache);
 630       *var = fold_build2 (MULT_EXPR, type, var0, op1);
 631       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
 632       return true;
 633
 634     case ADDR_EXPR:
 635       {
 636         tree base, poffset;
 637         poly_int64 pbitsize, pbitpos, pbytepos;
 638         machine_mode pmode;
 639         int punsignedp, preversep, pvolatilep;
 640
 641         op0 = TREE_OPERAND (op0, 0);
 642         base
 643           = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
 644                                  &punsignedp, &preversep, &pvolatilep);
 645
 646         if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 647           return false;
 648         base = build_fold_addr_expr (base);
 649         off0 = ssize_int (pbytepos);
 650
 651         if (poffset)
 652           {
 653             split_constant_offset (poffset, &poffset, &off1, cache);
 654             off0 = size_binop (PLUS_EXPR, off0, off1);
 655             if (POINTER_TYPE_P (TREE_TYPE (base)))
 656               base = fold_build_pointer_plus (base, poffset);
 657             else
 658               base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
 659                                   fold_convert (TREE_TYPE (base), poffset));
 660           }
 661
 662         var0 = fold_convert (type, base);
 663
 664         /* If variable length types are involved, punt, otherwise casts
 665            might be converted into ARRAY_REFs in gimplify_conversion.
 666            To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
 667            possibly no longer appears in current GIMPLE, might resurface.
 668            This perhaps could run
 669            if (CONVERT_EXPR_P (var0))
 670              {
 671                gimplify_conversion (&var0);
 672                // Attempt to fill in any within var0 found ARRAY_REF's
 673                // element size from corresponding op embedded ARRAY_REF,
 674                // if unsuccessful, just punt.
 675              }  */
 676         while (POINTER_TYPE_P (type))
 677           type = TREE_TYPE (type);
 678         if (int_size_in_bytes (type) < 0)
 679           return false;
 680
 681         *var = var0;
 682         *off = off0;
 683         return true;
 684       }
 685
 686     case SSA_NAME:
 687       {
 688         if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
 689           return false;
 690
 691         gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
 692         enum tree_code subcode;
 693
 694         if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
 695           return false;
 696
 697         subcode = gimple_assign_rhs_code (def_stmt);
 698
 699         /* We are using a cache to avoid un-CSEing large amounts of code.  */
 700         bool use_cache = false;
 701         if (!has_single_use (op0)
 702             && (subcode == POINTER_PLUS_EXPR
 703                 || subcode == PLUS_EXPR
 704                 || subcode == MINUS_EXPR
 705                 || subcode == MULT_EXPR
 706                 || subcode == ADDR_EXPR
 707                 || CONVERT_EXPR_CODE_P (subcode)))
 708           {
 709             use_cache = true;
 710             bool existed;
 711             std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
 712             if (existed)
 713               {
 714                 if (integer_zerop (e.second))
 715                   return false;
 716                 *var = e.first;
 717                 *off = e.second;
 718                 return true;
 719               }
 720             e = std::make_pair (op0, ssize_int (0));
 721           }
 722
 723         var0 = gimple_assign_rhs1 (def_stmt);
 724         var1 = gimple_assign_rhs2 (def_stmt);
 725
 726         bool res = split_constant_offset_1 (type, var0, subcode, var1,
 727                                             var, off, cache);
 728         if (res && use_cache)
 729           *cache.get (op0) = std::make_pair (*var, *off);
 730         return res;
 731       }
 732     CASE_CONVERT:
 733       {
 734         /* We must not introduce undefined overflow, and we must not change
 735            the value.  Hence we're okay if the inner type doesn't overflow
 736            to start with (pointer or signed), the outer type also is an
 737            integer or pointer and the outer precision is at least as large
 738            as the inner.  */
 739         tree itype = TREE_TYPE (op0);
 740         if ((POINTER_TYPE_P (itype)
 741              || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
 742             && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
 743             && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
 744           {
 745             if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype))
 746               {
 747                 /* Split the unconverted operand and try to prove that
 748                    wrapping isn't a problem.  */
 749                 tree tmp_var, tmp_off;
 750                 split_constant_offset (op0, &tmp_var, &tmp_off, cache);
 751
 752                 /* See whether we have an SSA_NAME whose range is known
 753                    to be [A, B].  */
 754                 if (TREE_CODE (tmp_var) != SSA_NAME)
 755                   return false;
 756                 wide_int var_min, var_max;
 757                 value_range_kind vr_type = get_range_info (tmp_var, &var_min,
 758                                                            &var_max);
 759                 wide_int var_nonzero = get_nonzero_bits (tmp_var);
 760                 signop sgn = TYPE_SIGN (itype);
 761                 if (intersect_range_with_nonzero_bits (vr_type, &var_min,
 762                                                        &var_max, var_nonzero,
 763                                                        sgn) != VR_RANGE)
 764                   return false;
 765
 766                 /* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
 767                    is known to be [A + TMP_OFF, B + TMP_OFF], with all
 768                    operations done in ITYPE.  The addition must overflow
 769                    at both ends of the range or at neither.  */
 770                 wi::overflow_type overflow[2];
 771                 unsigned int prec = TYPE_PRECISION (itype);
 772                 wide_int woff = wi::to_wide (tmp_off, prec);
 773                 wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
 774                 wi::add (var_max, woff, sgn, &overflow[1]);
 775                 if ((overflow[0] != wi::OVF_NONE) != (overflow[1] != wi::OVF_NONE))
 776                   return false;
 777
 778                 /* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR.  */
 779                 widest_int diff = (widest_int::from (op0_min, sgn)
 780                                    - widest_int::from (var_min, sgn));
 781                 var0 = tmp_var;
 782                 *off = wide_int_to_tree (ssizetype, diff);
 783               }
 784             else
 785               split_constant_offset (op0, &var0, off, cache);
 786             *var = fold_convert (type, var0);
 787             return true;
 788           }
 789         return false;
 790       }
 791
 792     default:
 793       return false;
 794     }
 795 }
 796
 797 /* Expresses EXP as VAR + OFF, where off is a constant.  The type of OFF
 798    will be ssizetype.  */
 799
 800 static void
 801 split_constant_offset (tree exp, tree *var, tree *off,
 802                        hash_map<tree, std::pair<tree, tree> > &cache)
 803 {
 804   tree type = TREE_TYPE (exp), op0, op1, e, o;
 805   enum tree_code code;
 806
 807   *var = exp;
 808   *off = ssize_int (0);
 809
 810   if (tree_is_chrec (exp)
 811       || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
 812     return;
 813
 814   code = TREE_CODE (exp);
 815   extract_ops_from_tree (exp, &code, &op0, &op1);
 816   if (split_constant_offset_1 (type, op0, code, op1, &e, &o, cache))
 817     {
 818       *var = e;
 819       *off = o;
 820     }
 821 }
 822
 823 void
 824 split_constant_offset (tree exp, tree *var, tree *off)
 825 {
 826   static hash_map<tree, std::pair<tree, tree> > *cache;
 827   if (!cache)
 828     cache = new hash_map<tree, std::pair<tree, tree> > (37);
 829   split_constant_offset (exp, var, off, *cache);
 830   cache->empty ();
 831 }
 832
 833 /* Returns the address ADDR of an object in a canonical shape (without nop
 834    casts, and with type of pointer to the object).  */
 835
 836 static tree
 837 canonicalize_base_object_address (tree addr)
 838 {
 839   tree orig = addr;
 840
 841   STRIP_NOPS (addr);
 842
 843   /* The base address may be obtained by casting from integer, in that case
 844      keep the cast.  */
 845   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
 846     return orig;
 847
 848   if (TREE_CODE (addr) != ADDR_EXPR)
 849     return addr;
 850
 851   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
 852 }
 853
 854 /* Analyze the behavior of memory reference REF within STMT.
 855    There are two modes:
 856
 857    - BB analysis.  In this case we simply split the address into base,
 858      init and offset components, without reference to any containing loop.
 859      The resulting base and offset are general expressions and they can
 860      vary arbitrarily from one iteration of the containing loop to the next.
 861      The step is always zero.
 862
 863    - loop analysis.  In this case we analyze the reference both wrt LOOP
 864      and on the basis that the reference occurs (is "used") in LOOP;
 865      see the comment above analyze_scalar_evolution_in_loop for more
 866      information about this distinction.  The base, init, offset and
 867      step fields are all invariant in LOOP.
 868
 869    Perform BB analysis if LOOP is null, or if LOOP is the function's
 870    dummy outermost loop.  In other cases perform loop analysis.
 871
 872    Return true if the analysis succeeded and store the results in DRB if so.
 873    BB analysis can only fail for bitfield or reversed-storage accesses.  */
 874
 875 opt_result
 876 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
 877                       struct loop *loop, const gimple *stmt)
 878 {
 879   poly_int64 pbitsize, pbitpos;
 880   tree base, poffset;
 881   machine_mode pmode;
 882   int punsignedp, preversep, pvolatilep;
 883   affine_iv base_iv, offset_iv;
 884   tree init, dinit, step;
 885   bool in_loop = (loop && loop->num);
 886
 887   if (dump_file && (dump_flags & TDF_DETAILS))
 888     fprintf (dump_file, "analyze_innermost: ");
 889
 890   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
 891                               &punsignedp, &preversep, &pvolatilep);
 892   gcc_assert (base != NULL_TREE);
 893
 894   poly_int64 pbytepos;
 895   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 896     return opt_result::failure_at (stmt,
 897                                    "failed: bit offset alignment.\n");
 898
 899   if (preversep)
 900     return opt_result::failure_at (stmt,
 901                                    "failed: reverse storage order.\n");
 902
 903   /* Calculate the alignment and misalignment for the inner reference.  */
 904   unsigned int HOST_WIDE_INT bit_base_misalignment;
 905   unsigned int bit_base_alignment;
 906   get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
 907
 908   /* There are no bitfield references remaining in BASE, so the values
 909      we got back must be whole bytes.  */
 910   gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
 911               && bit_base_misalignment % BITS_PER_UNIT == 0);
 912   unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
 913   poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
 914
 915   if (TREE_CODE (base) == MEM_REF)
 916     {
 917       if (!integer_zerop (TREE_OPERAND (base, 1)))
 918         {
 919           /* Subtract MOFF from the base and add it to POFFSET instead.
 920              Adjust the misalignment to reflect the amount we subtracted.  */
 921           poly_offset_int moff = mem_ref_offset (base);
 922           base_misalignment -= moff.force_shwi ();
 923           tree mofft = wide_int_to_tree (sizetype, moff);
 924           if (!poffset)
 925             poffset = mofft;
 926           else
 927             poffset = size_binop (PLUS_EXPR, poffset, mofft);
 928         }
 929       base = TREE_OPERAND (base, 0);
 930     }
 931   else
 932     base = build_fold_addr_expr (base);
 933
 934   if (in_loop)
 935     {
 936       if (!simple_iv (loop, loop, base, &base_iv, true))
 937         return opt_result::failure_at
 938           (stmt, "failed: evolution of base is not affine.\n");
 939     }
 940   else
 941     {
 942       base_iv.base = base;
 943       base_iv.step = ssize_int (0);
 944       base_iv.no_overflow = true;
 945     }
 946
 947   if (!poffset)
 948     {
 949       offset_iv.base = ssize_int (0);
 950       offset_iv.step = ssize_int (0);
 951     }
 952   else
 953     {
 954       if (!in_loop)
 955         {
 956           offset_iv.base = poffset;
 957           offset_iv.step = ssize_int (0);
 958         }
 959       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
 960         return opt_result::failure_at
 961           (stmt, "failed: evolution of offset is not affine.\n");
 962     }
 963
 964   init = ssize_int (pbytepos);
 965
 966   /* Subtract any constant component from the base and add it to INIT instead.
 967      Adjust the misalignment to reflect the amount we subtracted.  */
 968   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
 969   init = size_binop (PLUS_EXPR, init, dinit);
 970   base_misalignment -= TREE_INT_CST_LOW (dinit);
 971
 972   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
 973   init = size_binop (PLUS_EXPR, init, dinit);
 974
 975   step = size_binop (PLUS_EXPR,
 976                      fold_convert (ssizetype, base_iv.step),
 977                      fold_convert (ssizetype, offset_iv.step));
 978
 979   base = canonicalize_base_object_address (base_iv.base);
 980
 981   /* See if get_pointer_alignment can guarantee a higher alignment than
 982      the one we calculated above.  */
 983   unsigned int HOST_WIDE_INT alt_misalignment;
 984   unsigned int alt_alignment;
 985   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
 986
 987   /* As above, these values must be whole bytes.  */
 988   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
 989               && alt_misalignment % BITS_PER_UNIT == 0);
 990   alt_alignment /= BITS_PER_UNIT;
 991   alt_misalignment /= BITS_PER_UNIT;
 992
 993   if (base_alignment < alt_alignment)
 994     {
 995       base_alignment = alt_alignment;
 996       base_misalignment = alt_misalignment;
 997     }
 998
 999   drb->base_address = base;
1000   drb->offset = fold_convert (ssizetype, offset_iv.base);
1001   drb->init = init;
1002   drb->step = step;
1003   if (known_misalignment (base_misalignment, base_alignment,
1004                           &drb->base_misalignment))
1005     drb->base_alignment = base_alignment;
1006   else
1007     {
1008       drb->base_alignment = known_alignment (base_misalignment);
1009       drb->base_misalignment = 0;
1010     }
1011   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1012   drb->step_alignment = highest_pow2_factor (step);
1013
1014   if (dump_file && (dump_flags & TDF_DETAILS))
1015     fprintf (dump_file, "success.\n");
1016
1017   return opt_result::success ();
1018 }
1019
1020 /* Return true if OP is a valid component reference for a DR access
1021    function.  This accepts a subset of what handled_component_p accepts.  */
1022
1023 static bool
1024 access_fn_component_p (tree op)
1025 {
1026   switch (TREE_CODE (op))
1027     {
1028     case REALPART_EXPR:
1029     case IMAGPART_EXPR:
1030     case ARRAY_REF:
1031       return true;
1032
1033     case COMPONENT_REF:
1034       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1035
1036     default:
1037       return false;
1038     }
1039 }
1040
1041 /* Determines the base object and the list of indices of memory reference
1042    DR, analyzed in LOOP and instantiated before NEST.  */
1043
1044 static void
1045 dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
1046 {
1047   vec<tree> access_fns = vNULL;
1048   tree ref, op;
1049   tree base, off, access_fn;
1050
1051   /* If analyzing a basic-block there are no indices to analyze
1052      and thus no access functions.  */
1053   if (!nest)
1054     {
1055       DR_BASE_OBJECT (dr) = DR_REF (dr);
1056       DR_ACCESS_FNS (dr).create (0);
1057       return;
1058     }
1059
1060   ref = DR_REF (dr);
1061
1062   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1063      into a two element array with a constant index.  The base is
1064      then just the immediate underlying object.  */
1065   if (TREE_CODE (ref) == REALPART_EXPR)
1066     {
1067       ref = TREE_OPERAND (ref, 0);
1068       access_fns.safe_push (integer_zero_node);
1069     }
1070   else if (TREE_CODE (ref) == IMAGPART_EXPR)
1071     {
1072       ref = TREE_OPERAND (ref, 0);
1073       access_fns.safe_push (integer_one_node);
1074     }
1075
1076   /* Analyze access functions of dimensions we know to be independent.
1077      The list of component references handled here should be kept in
1078      sync with access_fn_component_p.  */
1079   while (handled_component_p (ref))
1080     {
1081       if (TREE_CODE (ref) == ARRAY_REF)
1082         {
1083           op = TREE_OPERAND (ref, 1);
1084           access_fn = analyze_scalar_evolution (loop, op);
1085           access_fn = instantiate_scev (nest, loop, access_fn);
1086           access_fns.safe_push (access_fn);
1087         }
1088       else if (TREE_CODE (ref) == COMPONENT_REF
1089                && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1090         {
1091           /* For COMPONENT_REFs of records (but not unions!) use the
1092              FIELD_DECL offset as constant access function so we can
1093              disambiguate a[i].f1 and a[i].f2.  */
1094           tree off = component_ref_field_offset (ref);
1095           off = size_binop (PLUS_EXPR,
1096                             size_binop (MULT_EXPR,
1097                                         fold_convert (bitsizetype, off),
1098                                         bitsize_int (BITS_PER_UNIT)),
1099                             DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1100           access_fns.safe_push (off);
1101         }
1102       else
1103         /* If we have an unhandled component we could not translate
1104            to an access function stop analyzing.  We have determined
1105            our base object in this case.  */
1106         break;
1107
1108       ref = TREE_OPERAND (ref, 0);
1109     }
1110
1111   /* If the address operand of a MEM_REF base has an evolution in the
1112      analyzed nest, add it as an additional independent access-function.  */
1113   if (TREE_CODE (ref) == MEM_REF)
1114     {
1115       op = TREE_OPERAND (ref, 0);
1116       access_fn = analyze_scalar_evolution (loop, op);
1117       access_fn = instantiate_scev (nest, loop, access_fn);
1118       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1119         {
1120           tree orig_type;
1121           tree memoff = TREE_OPERAND (ref, 1);
1122           base = initial_condition (access_fn);
1123           orig_type = TREE_TYPE (base);
1124           STRIP_USELESS_TYPE_CONVERSION (base);
1125           split_constant_offset (base, &base, &off);
1126           STRIP_USELESS_TYPE_CONVERSION (base);
1127           /* Fold the MEM_REF offset into the evolutions initial
1128              value to make more bases comparable.  */
1129           if (!integer_zerop (memoff))
1130             {
1131               off = size_binop (PLUS_EXPR, off,
1132                                 fold_convert (ssizetype, memoff));
1133               memoff = build_int_cst (TREE_TYPE (memoff), 0);
1134             }
1135           /* Adjust the offset so it is a multiple of the access type
1136              size and thus we separate bases that can possibly be used
1137              to produce partial overlaps (which the access_fn machinery
1138              cannot handle).  */
1139           wide_int rem;
1140           if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1141               && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1142               && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1143             rem = wi::mod_trunc
1144               (wi::to_wide (off),
1145                wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1146                SIGNED);
1147           else
1148             /* If we can't compute the remainder simply force the initial
1149                condition to zero.  */
1150             rem = wi::to_wide (off);
1151           off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1152           memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1153           /* And finally replace the initial condition.  */
1154           access_fn = chrec_replace_initial_condition
1155               (access_fn, fold_convert (orig_type, off));
1156           /* ???  This is still not a suitable base object for
1157              dr_may_alias_p - the base object needs to be an
1158              access that covers the object as whole.  With
1159              an evolution in the pointer this cannot be
1160              guaranteed.
1161              As a band-aid, mark the access so we can special-case
1162              it in dr_may_alias_p.  */
1163           tree old = ref;
1164           ref = fold_build2_loc (EXPR_LOCATION (ref),
1165                                  MEM_REF, TREE_TYPE (ref),
1166                                  base, memoff);
1167           MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1168           MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1169           DR_UNCONSTRAINED_BASE (dr) = true;
1170           access_fns.safe_push (access_fn);
1171         }
1172     }
1173   else if (DECL_P (ref))
1174     {
1175       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1176       ref = build2 (MEM_REF, TREE_TYPE (ref),
1177                     build_fold_addr_expr (ref),
1178                     build_int_cst (reference_alias_ptr_type (ref), 0));
1179     }
1180
1181   DR_BASE_OBJECT (dr) = ref;
1182   DR_ACCESS_FNS (dr) = access_fns;
1183 }
1184
1185 /* Extracts the alias analysis information from the memory reference DR.  */
1186
1187 static void
1188 dr_analyze_alias (struct data_reference *dr)
1189 {
1190   tree ref = DR_REF (dr);
1191   tree base = get_base_address (ref), addr;
1192
1193   if (INDIRECT_REF_P (base)
1194       || TREE_CODE (base) == MEM_REF)
1195     {
1196       addr = TREE_OPERAND (base, 0);
1197       if (TREE_CODE (addr) == SSA_NAME)
1198         DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1199     }
1200 }
1201
1202 /* Frees data reference DR.  */
1203
1204 void
1205 free_data_ref (data_reference_p dr)
1206 {
1207   DR_ACCESS_FNS (dr).release ();
1208   free (dr);
1209 }
1210
1211 /* Analyze memory reference MEMREF, which is accessed in STMT.
1212    The reference is a read if IS_READ is true, otherwise it is a write.
1213    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1214    within STMT, i.e. that it might not occur even if STMT is executed
1215    and runs to completion.
1216
1217    Return the data_reference description of MEMREF.  NEST is the outermost
1218    loop in which the reference should be instantiated, LOOP is the loop
1219    in which the data reference should be analyzed.  */
1220
1221 struct data_reference *
1222 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1223                  bool is_read, bool is_conditional_in_stmt)
1224 {
1225   struct data_reference *dr;
1226
1227   if (dump_file && (dump_flags & TDF_DETAILS))
1228     {
1229       fprintf (dump_file, "Creating dr for ");
1230       print_generic_expr (dump_file, memref, TDF_SLIM);
1231       fprintf (dump_file, "\n");
1232     }
1233
1234   dr = XCNEW (struct data_reference);
1235   DR_STMT (dr) = stmt;
1236   DR_REF (dr) = memref;
1237   DR_IS_READ (dr) = is_read;
1238   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1239
1240   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1241                         nest != NULL ? loop : NULL, stmt);
1242   dr_analyze_indices (dr, nest, loop);
1243   dr_analyze_alias (dr);
1244
1245   if (dump_file && (dump_flags & TDF_DETAILS))
1246     {
1247       unsigned i;
1248       fprintf (dump_file, "\tbase_address: ");
1249       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1250       fprintf (dump_file, "\n\toffset from base address: ");
1251       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1252       fprintf (dump_file, "\n\tconstant offset from base address: ");
1253       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1254       fprintf (dump_file, "\n\tstep: ");
1255       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1256       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1257       fprintf (dump_file, "\n\tbase misalignment: %d",
1258                DR_BASE_MISALIGNMENT (dr));
1259       fprintf (dump_file, "\n\toffset alignment: %d",
1260                DR_OFFSET_ALIGNMENT (dr));
1261       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1262       fprintf (dump_file, "\n\tbase_object: ");
1263       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1264       fprintf (dump_file, "\n");
1265       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1266         {
1267           fprintf (dump_file, "\tAccess function %d: ", i);
1268           print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1269         }
1270     }
1271
1272   return dr;
1273 }
1274
1275 /*  A helper function computes order between two tree epxressions T1 and T2.
1276     This is used in comparator functions sorting objects based on the order
1277     of tree expressions.  The function returns -1, 0, or 1.  */
1278
1279 int
1280 data_ref_compare_tree (tree t1, tree t2)
1281 {
1282   int i, cmp;
1283   enum tree_code code;
1284   char tclass;
1285
1286   if (t1 == t2)
1287     return 0;
1288   if (t1 == NULL)
1289     return -1;
1290   if (t2 == NULL)
1291     return 1;
1292
1293   STRIP_USELESS_TYPE_CONVERSION (t1);
1294   STRIP_USELESS_TYPE_CONVERSION (t2);
1295   if (t1 == t2)
1296     return 0;
1297
1298   if (TREE_CODE (t1) != TREE_CODE (t2)
1299       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1300     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1301
1302   code = TREE_CODE (t1);
1303   switch (code)
1304     {
1305     case INTEGER_CST:
1306       return tree_int_cst_compare (t1, t2);
1307
1308     case STRING_CST:
1309       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1310         return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1311       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1312                      TREE_STRING_LENGTH (t1));
1313
1314     case SSA_NAME:
1315       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1316         return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1317       break;
1318
1319     default:
1320       if (POLY_INT_CST_P (t1))
1321         return compare_sizes_for_sort (wi::to_poly_widest (t1),
1322                                        wi::to_poly_widest (t2));
1323
1324       tclass = TREE_CODE_CLASS (code);
1325
1326       /* For decls, compare their UIDs.  */
1327       if (tclass == tcc_declaration)
1328         {
1329           if (DECL_UID (t1) != DECL_UID (t2))
1330             return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1331           break;
1332         }
1333       /* For expressions, compare their operands recursively.  */
1334       else if (IS_EXPR_CODE_CLASS (tclass))
1335         {
1336           for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1337             {
1338               cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1339                                            TREE_OPERAND (t2, i));
1340               if (cmp != 0)
1341                 return cmp;
1342             }
1343         }
1344       else
1345         gcc_unreachable ();
1346     }
1347
1348   return 0;
1349 }
1350
1351 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1352    check.  */
1353
1354 opt_result
1355 runtime_alias_check_p (ddr_p ddr, struct loop *loop, bool speed_p)
1356 {
1357   if (dump_enabled_p ())
1358     dump_printf (MSG_NOTE,
1359                  "consider run-time aliasing test between %T and %T\n",
1360                  DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1361
1362   if (!speed_p)
1363     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1364                                    "runtime alias check not supported when"
1365                                    " optimizing for size.\n");
1366
1367   /* FORNOW: We don't support versioning with outer-loop in either
1368      vectorization or loop distribution.  */
1369   if (loop != NULL && loop->inner != NULL)
1370     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1371                                    "runtime alias check not supported for"
1372                                    " outer loop.\n");
1373
1374   return opt_result::success ();
1375 }
1376
1377 /* Operator == between two dr_with_seg_len objects.
1378
1379    This equality operator is used to make sure two data refs
1380    are the same one so that we will consider to combine the
1381    aliasing checks of those two pairs of data dependent data
1382    refs.  */
1383
1384 static bool
1385 operator == (const dr_with_seg_len& d1,
1386              const dr_with_seg_len& d2)
1387 {
1388   return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1389                            DR_BASE_ADDRESS (d2.dr), 0)
1390           && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1391           && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1392           && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1393           && known_eq (d1.access_size, d2.access_size)
1394           && d1.align == d2.align);
1395 }
1396
1397 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1398    so that we can combine aliasing checks in one scan.  */
1399
1400 static int
1401 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1402 {
1403   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1404   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1405   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1406   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1407
1408   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1409      if a and c have the same basic address snd step, and b and d have the same
1410      address and step.  Therefore, if any a&c or b&d don't have the same address
1411      and step, we don't care the order of those two pairs after sorting.  */
1412   int comp_res;
1413
1414   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1415                                          DR_BASE_ADDRESS (b1.dr))) != 0)
1416     return comp_res;
1417   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1418                                          DR_BASE_ADDRESS (b2.dr))) != 0)
1419     return comp_res;
1420   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1421                                          DR_STEP (b1.dr))) != 0)
1422     return comp_res;
1423   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1424                                          DR_STEP (b2.dr))) != 0)
1425     return comp_res;
1426   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1427                                          DR_OFFSET (b1.dr))) != 0)
1428     return comp_res;
1429   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1430                                          DR_INIT (b1.dr))) != 0)
1431     return comp_res;
1432   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1433                                          DR_OFFSET (b2.dr))) != 0)
1434     return comp_res;
1435   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1436                                          DR_INIT (b2.dr))) != 0)
1437     return comp_res;
1438
1439   return 0;
1440 }
1441
1442 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1443    FACTOR is number of iterations that each data reference is accessed.
1444
1445    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1446    we create an expression:
1447
1448    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1449    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1450
1451    for aliasing checks.  However, in some cases we can decrease the number
1452    of checks by combining two checks into one.  For example, suppose we have
1453    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1454    condition is satisfied:
1455
1456    load_ptr_0 < load_ptr_1  &&
1457    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1458
1459    (this condition means, in each iteration of vectorized loop, the accessed
1460    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1461    load_ptr_1.)
1462
1463    we then can use only the following expression to finish the alising checks
1464    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1465
1466    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1467    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1468
1469    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1470    basic address.  */
1471
1472 void
1473 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1474                                poly_uint64)
1475 {
1476   /* Sort the collected data ref pairs so that we can scan them once to
1477      combine all possible aliasing checks.  */
1478   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1479
1480   /* Scan the sorted dr pairs and check if we can combine alias checks
1481      of two neighboring dr pairs.  */
1482   for (size_t i = 1; i < alias_pairs->length (); ++i)
1483     {
1484       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1485       dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
1486                       *dr_b1 = &(*alias_pairs)[i-1].second,
1487                       *dr_a2 = &(*alias_pairs)[i].first,
1488                       *dr_b2 = &(*alias_pairs)[i].second;
1489
1490       /* Remove duplicate data ref pairs.  */
1491       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1492         {
1493           if (dump_enabled_p ())
1494             dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1495                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1496                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1497           alias_pairs->ordered_remove (i--);
1498           continue;
1499         }
1500
1501       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1502         {
1503           /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1504              and DR_A1 and DR_A2 are two consecutive memrefs.  */
1505           if (*dr_a1 == *dr_a2)
1506             {
1507               std::swap (dr_a1, dr_b1);
1508               std::swap (dr_a2, dr_b2);
1509             }
1510
1511           poly_int64 init_a1, init_a2;
1512           /* Only consider cases in which the distance between the initial
1513              DR_A1 and the initial DR_A2 is known at compile time.  */
1514           if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1515                                 DR_BASE_ADDRESS (dr_a2->dr), 0)
1516               || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1517                                    DR_OFFSET (dr_a2->dr), 0)
1518               || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1519               || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1520             continue;
1521
1522           /* Don't combine if we can't tell which one comes first.  */
1523           if (!ordered_p (init_a1, init_a2))
1524             continue;
1525
1526           /* Make sure dr_a1 starts left of dr_a2.  */
1527           if (maybe_gt (init_a1, init_a2))
1528             {
1529               std::swap (*dr_a1, *dr_a2);
1530               std::swap (init_a1, init_a2);
1531             }
1532
1533           /* Work out what the segment length would be if we did combine
1534              DR_A1 and DR_A2:
1535
1536              - If DR_A1 and DR_A2 have equal lengths, that length is
1537                also the combined length.
1538
1539              - If DR_A1 and DR_A2 both have negative "lengths", the combined
1540                length is the lower bound on those lengths.
1541
1542              - If DR_A1 and DR_A2 both have positive lengths, the combined
1543                length is the upper bound on those lengths.
1544
1545              Other cases are unlikely to give a useful combination.
1546
1547              The lengths both have sizetype, so the sign is taken from
1548              the step instead.  */
1549           if (!operand_equal_p (dr_a1->seg_len, dr_a2->seg_len, 0))
1550             {
1551               poly_uint64 seg_len_a1, seg_len_a2;
1552               if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1553                   || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1554                 continue;
1555
1556               tree indicator_a = dr_direction_indicator (dr_a1->dr);
1557               if (TREE_CODE (indicator_a) != INTEGER_CST)
1558                 continue;
1559
1560               tree indicator_b = dr_direction_indicator (dr_a2->dr);
1561               if (TREE_CODE (indicator_b) != INTEGER_CST)
1562                 continue;
1563
1564               int sign_a = tree_int_cst_sgn (indicator_a);
1565               int sign_b = tree_int_cst_sgn (indicator_b);
1566
1567               poly_uint64 new_seg_len;
1568               if (sign_a <= 0 && sign_b <= 0)
1569                 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1570               else if (sign_a >= 0 && sign_b >= 0)
1571                 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1572               else
1573                 continue;
1574
1575               dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1576                                               new_seg_len);
1577               dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1578             }
1579
1580           /* This is always positive due to the swap above.  */
1581           poly_uint64 diff = init_a2 - init_a1;
1582
1583           /* The new check will start at DR_A1.  Make sure that its access
1584              size encompasses the initial DR_A2.  */
1585           if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1586             {
1587               dr_a1->access_size = upper_bound (dr_a1->access_size,
1588                                                 diff + dr_a2->access_size);
1589               unsigned int new_align = known_alignment (dr_a1->access_size);
1590               dr_a1->align = MIN (dr_a1->align, new_align);
1591             }
1592           if (dump_enabled_p ())
1593             dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1594                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1595                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1596           alias_pairs->ordered_remove (i);
1597           i--;
1598         }
1599     }
1600 }
1601
1602 /* Given LOOP's two data references and segment lengths described by DR_A
1603    and DR_B, create expression checking if the two addresses ranges intersect
1604    with each other based on index of the two addresses.  This can only be
1605    done if DR_A and DR_B referring to the same (array) object and the index
1606    is the only difference.  For example:
1607
1608                        DR_A                           DR_B
1609       data-ref         arr[i]                         arr[j]
1610       base_object      arr                            arr
1611       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
1612
1613    The addresses and their index are like:
1614
1615         |<- ADDR_A    ->|          |<- ADDR_B    ->|
1616      ------------------------------------------------------->
1617         |   |   |   |   |          |   |   |   |   |
1618      ------------------------------------------------------->
1619         i_0 ...         i_0+4      j_0 ...         j_0+4
1620
1621    We can create expression based on index rather than address:
1622
1623      (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1624
1625    Note evolution step of index needs to be considered in comparison.  */
1626
1627 static bool
1628 create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
1629                                      const dr_with_seg_len& dr_a,
1630                                      const dr_with_seg_len& dr_b)
1631 {
1632   if (integer_zerop (DR_STEP (dr_a.dr))
1633       || integer_zerop (DR_STEP (dr_b.dr))
1634       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1635     return false;
1636
1637   poly_uint64 seg_len1, seg_len2;
1638   if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
1639       || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
1640     return false;
1641
1642   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1643     return false;
1644
1645   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1646     return false;
1647
1648   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1649     return false;
1650
1651   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1652
1653   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1654   unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
1655   if (neg_step)
1656     {
1657       abs_step = -abs_step;
1658       seg_len1 = -seg_len1;
1659       seg_len2 = -seg_len2;
1660     }
1661   else
1662     {
1663       /* Include the access size in the length, so that we only have one
1664          tree addition below.  */
1665       seg_len1 += dr_a.access_size;
1666       seg_len2 += dr_b.access_size;
1667     }
1668
1669   /* Infer the number of iterations with which the memory segment is accessed
1670      by DR.  In other words, alias is checked if memory segment accessed by
1671      DR_A in some iterations intersect with memory segment accessed by DR_B
1672      in the same amount iterations.
1673      Note segnment length is a linear function of number of iterations with
1674      DR_STEP as the coefficient.  */
1675   poly_uint64 niter_len1, niter_len2;
1676   if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
1677       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
1678     return false;
1679
1680   poly_uint64 niter_access1 = 0, niter_access2 = 0;
1681   if (neg_step)
1682     {
1683       /* Divide each access size by the byte step, rounding up.  */
1684       if (!can_div_trunc_p (dr_a.access_size - abs_step - 1,
1685                             abs_step, &niter_access1)
1686           || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
1687                                abs_step, &niter_access2))
1688         return false;
1689     }
1690
1691   unsigned int i;
1692   for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1693     {
1694       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1695       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1696       /* Two indices must be the same if they are not scev, or not scev wrto
1697          current loop being vecorized.  */
1698       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1699           || TREE_CODE (access2) != POLYNOMIAL_CHREC
1700           || CHREC_VARIABLE (access1) != (unsigned)loop->num
1701           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1702         {
1703           if (operand_equal_p (access1, access2, 0))
1704             continue;
1705
1706           return false;
1707         }
1708       /* The two indices must have the same step.  */
1709       if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1710         return false;
1711
1712       tree idx_step = CHREC_RIGHT (access1);
1713       /* Index must have const step, otherwise DR_STEP won't be constant.  */
1714       gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1715       /* Index must evaluate in the same direction as DR.  */
1716       gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1717
1718       tree min1 = CHREC_LEFT (access1);
1719       tree min2 = CHREC_LEFT (access2);
1720       if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1721         return false;
1722
1723       /* Ideally, alias can be checked against loop's control IV, but we
1724          need to prove linear mapping between control IV and reference
1725          index.  Although that should be true, we check against (array)
1726          index of data reference.  Like segment length, index length is
1727          linear function of the number of iterations with index_step as
1728          the coefficient, i.e, niter_len * idx_step.  */
1729       tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1730                                    build_int_cst (TREE_TYPE (min1),
1731                                                   niter_len1));
1732       tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1733                                    build_int_cst (TREE_TYPE (min2),
1734                                                   niter_len2));
1735       tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
1736       tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
1737       /* Adjust ranges for negative step.  */
1738       if (neg_step)
1739         {
1740           /* IDX_LEN1 and IDX_LEN2 are negative in this case.  */
1741           std::swap (min1, max1);
1742           std::swap (min2, max2);
1743
1744           /* As with the lengths just calculated, we've measured the access
1745              sizes in iterations, so multiply them by the index step.  */
1746           tree idx_access1
1747             = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1748                            build_int_cst (TREE_TYPE (min1), niter_access1));
1749           tree idx_access2
1750             = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1751                            build_int_cst (TREE_TYPE (min2), niter_access2));
1752
1753           /* MINUS_EXPR because the above values are negative.  */
1754           max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (max1), max1, idx_access1);
1755           max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (max2), max2, idx_access2);
1756         }
1757       tree part_cond_expr
1758         = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1759             fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
1760             fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
1761       if (*cond_expr)
1762         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1763                                   *cond_expr, part_cond_expr);
1764       else
1765         *cond_expr = part_cond_expr;
1766     }
1767   return true;
1768 }
1769
1770 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
1771    every address ADDR accessed by D:
1772
1773      *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
1774
1775    In this case, every element accessed by D is aligned to at least
1776    ALIGN bytes.
1777
1778    If ALIGN is zero then instead set *SEG_MAX_OUT so that:
1779
1780      *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT.  */
1781
1782 static void
1783 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
1784                      tree *seg_max_out, HOST_WIDE_INT align)
1785 {
1786   /* Each access has the following pattern:
1787
1788           <- |seg_len| ->
1789           <--- A: -ve step --->
1790           +-----+-------+-----+-------+-----+
1791           | n-1 | ,.... |  0  | ..... | n-1 |
1792           +-----+-------+-----+-------+-----+
1793                         <--- B: +ve step --->
1794                         <- |seg_len| ->
1795                         |
1796                    base address
1797
1798      where "n" is the number of scalar iterations covered by the segment.
1799      (This should be VF for a particular pair if we know that both steps
1800      are the same, otherwise it will be the full number of scalar loop
1801      iterations.)
1802
1803      A is the range of bytes accessed when the step is negative,
1804      B is the range when the step is positive.
1805
1806      If the access size is "access_size" bytes, the lowest addressed byte is:
1807
1808          base + (step < 0 ? seg_len : 0)   [LB]
1809
1810      and the highest addressed byte is always below:
1811
1812          base + (step < 0 ? 0 : seg_len) + access_size   [UB]
1813
1814      Thus:
1815
1816          LB <= ADDR < UB
1817
1818      If ALIGN is nonzero, all three values are aligned to at least ALIGN
1819      bytes, so:
1820
1821          LB <= ADDR <= UB - ALIGN
1822
1823      where "- ALIGN" folds naturally with the "+ access_size" and often
1824      cancels it out.
1825
1826      We don't try to simplify LB and UB beyond this (e.g. by using
1827      MIN and MAX based on whether seg_len rather than the stride is
1828      negative) because it is possible for the absolute size of the
1829      segment to overflow the range of a ssize_t.
1830
1831      Keeping the pointer_plus outside of the cond_expr should allow
1832      the cond_exprs to be shared with other alias checks.  */
1833   tree indicator = dr_direction_indicator (d.dr);
1834   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
1835                                fold_convert (ssizetype, indicator),
1836                                ssize_int (0));
1837   tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
1838                                             DR_OFFSET (d.dr));
1839   addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
1840   tree seg_len
1841     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
1842
1843   tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1844                                 seg_len, size_zero_node);
1845   tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1846                                 size_zero_node, seg_len);
1847   max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
1848                            size_int (d.access_size - align));
1849
1850   *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
1851   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
1852 }
1853
1854 /* Given two data references and segment lengths described by DR_A and DR_B,
1855    create expression checking if the two addresses ranges intersect with
1856    each other:
1857
1858      ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
1859      || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0))  */
1860
1861 static void
1862 create_intersect_range_checks (struct loop *loop, tree *cond_expr,
1863                                const dr_with_seg_len& dr_a,
1864                                const dr_with_seg_len& dr_b)
1865 {
1866   *cond_expr = NULL_TREE;
1867   if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
1868     return;
1869
1870   unsigned HOST_WIDE_INT min_align;
1871   tree_code cmp_code;
1872   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
1873       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
1874     {
1875       /* In this case adding access_size to seg_len is likely to give
1876          a simple X * step, where X is either the number of scalar
1877          iterations or the vectorization factor.  We're better off
1878          keeping that, rather than subtracting an alignment from it.
1879
1880          In this case the maximum values are exclusive and so there is
1881          no alias if the maximum of one segment equals the minimum
1882          of another.  */
1883       min_align = 0;
1884       cmp_code = LE_EXPR;
1885     }
1886   else
1887     {
1888       /* Calculate the minimum alignment shared by all four pointers,
1889          then arrange for this alignment to be subtracted from the
1890          exclusive maximum values to get inclusive maximum values.
1891          This "- min_align" is cumulative with a "+ access_size"
1892          in the calculation of the maximum values.  In the best
1893          (and common) case, the two cancel each other out, leaving
1894          us with an inclusive bound based only on seg_len.  In the
1895          worst case we're simply adding a smaller number than before.
1896
1897          Because the maximum values are inclusive, there is an alias
1898          if the maximum value of one segment is equal to the minimum
1899          value of the other.  */
1900       min_align = MIN (dr_a.align, dr_b.align);
1901       cmp_code = LT_EXPR;
1902     }
1903
1904   tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
1905   get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
1906   get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
1907
1908   *cond_expr
1909     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1910         fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
1911         fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
1912 }
1913
1914 /* Create a conditional expression that represents the run-time checks for
1915    overlapping of address ranges represented by a list of data references
1916    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
1917    COND_EXPR is the conditional expression to be used in the if statement
1918    that controls which version of the loop gets executed at runtime.  */
1919
1920 void
1921 create_runtime_alias_checks (struct loop *loop,
1922                              vec<dr_with_seg_len_pair_t> *alias_pairs,
1923                              tree * cond_expr)
1924 {
1925   tree part_cond_expr;
1926
1927   fold_defer_overflow_warnings ();
1928   for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
1929     {
1930       const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
1931       const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
1932
1933       if (dump_enabled_p ())
1934         dump_printf (MSG_NOTE,
1935                      "create runtime check for data references %T and %T\n",
1936                      DR_REF (dr_a.dr), DR_REF (dr_b.dr));
1937
1938       /* Create condition expression for each pair data references.  */
1939       create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
1940       if (*cond_expr)
1941         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1942                                   *cond_expr, part_cond_expr);
1943       else
1944         *cond_expr = part_cond_expr;
1945     }
1946   fold_undefer_and_ignore_overflow_warnings ();
1947 }
1948
1949 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1950    expressions.  */
1951 static bool
1952 dr_equal_offsets_p1 (tree offset1, tree offset2)
1953 {
1954   bool res;
1955
1956   STRIP_NOPS (offset1);
1957   STRIP_NOPS (offset2);
1958
1959   if (offset1 == offset2)
1960     return true;
1961
1962   if (TREE_CODE (offset1) != TREE_CODE (offset2)
1963       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1964     return false;
1965
1966   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1967                              TREE_OPERAND (offset2, 0));
1968
1969   if (!res || !BINARY_CLASS_P (offset1))
1970     return res;
1971
1972   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1973                              TREE_OPERAND (offset2, 1));
1974
1975   return res;
1976 }
1977
1978 /* Check if DRA and DRB have equal offsets.  */
1979 bool
1980 dr_equal_offsets_p (struct data_reference *dra,
1981                     struct data_reference *drb)
1982 {
1983   tree offset1, offset2;
1984
1985   offset1 = DR_OFFSET (dra);
1986   offset2 = DR_OFFSET (drb);
1987
1988   return dr_equal_offsets_p1 (offset1, offset2);
1989 }
1990
1991 /* Returns true if FNA == FNB.  */
1992
1993 static bool
1994 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1995 {
1996   unsigned i, n = fna.length ();
1997
1998   if (n != fnb.length ())
1999     return false;
2000
2001   for (i = 0; i < n; i++)
2002     if (!operand_equal_p (fna[i], fnb[i], 0))
2003       return false;
2004
2005   return true;
2006 }
2007
2008 /* If all the functions in CF are the same, returns one of them,
2009    otherwise returns NULL.  */
2010
2011 static affine_fn
2012 common_affine_function (conflict_function *cf)
2013 {
2014   unsigned i;
2015   affine_fn comm;
2016
2017   if (!CF_NONTRIVIAL_P (cf))
2018     return affine_fn ();
2019
2020   comm = cf->fns[0];
2021
2022   for (i = 1; i < cf->n; i++)
2023     if (!affine_function_equal_p (comm, cf->fns[i]))
2024       return affine_fn ();
2025
2026   return comm;
2027 }
2028
2029 /* Returns the base of the affine function FN.  */
2030
2031 static tree
2032 affine_function_base (affine_fn fn)
2033 {
2034   return fn[0];
2035 }
2036
2037 /* Returns true if FN is a constant.  */
2038
2039 static bool
2040 affine_function_constant_p (affine_fn fn)
2041 {
2042   unsigned i;
2043   tree coef;
2044
2045   for (i = 1; fn.iterate (i, &coef); i++)
2046     if (!integer_zerop (coef))
2047       return false;
2048
2049   return true;
2050 }
2051
2052 /* Returns true if FN is the zero constant function.  */
2053
2054 static bool
2055 affine_function_zero_p (affine_fn fn)
2056 {
2057   return (integer_zerop (affine_function_base (fn))
2058           && affine_function_constant_p (fn));
2059 }
2060
2061 /* Returns a signed integer type with the largest precision from TA
2062    and TB.  */
2063
2064 static tree
2065 signed_type_for_types (tree ta, tree tb)
2066 {
2067   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2068     return signed_type_for (ta);
2069   else
2070     return signed_type_for (tb);
2071 }
2072
2073 /* Applies operation OP on affine functions FNA and FNB, and returns the
2074    result.  */
2075
2076 static affine_fn
2077 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2078 {
2079   unsigned i, n, m;
2080   affine_fn ret;
2081   tree coef;
2082
2083   if (fnb.length () > fna.length ())
2084     {
2085       n = fna.length ();
2086       m = fnb.length ();
2087     }
2088   else
2089     {
2090       n = fnb.length ();
2091       m = fna.length ();
2092     }
2093
2094   ret.create (m);
2095   for (i = 0; i < n; i++)
2096     {
2097       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2098                                          TREE_TYPE (fnb[i]));
2099       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2100     }
2101
2102   for (; fna.iterate (i, &coef); i++)
2103     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2104                                  coef, integer_zero_node));
2105   for (; fnb.iterate (i, &coef); i++)
2106     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2107                                  integer_zero_node, coef));
2108
2109   return ret;
2110 }
2111
2112 /* Returns the sum of affine functions FNA and FNB.  */
2113
2114 static affine_fn
2115 affine_fn_plus (affine_fn fna, affine_fn fnb)
2116 {
2117   return affine_fn_op (PLUS_EXPR, fna, fnb);
2118 }
2119
2120 /* Returns the difference of affine functions FNA and FNB.  */
2121
2122 static affine_fn
2123 affine_fn_minus (affine_fn fna, affine_fn fnb)
2124 {
2125   return affine_fn_op (MINUS_EXPR, fna, fnb);
2126 }
2127
2128 /* Frees affine function FN.  */
2129
2130 static void
2131 affine_fn_free (affine_fn fn)
2132 {
2133   fn.release ();
2134 }
2135
2136 /* Determine for each subscript in the data dependence relation DDR
2137    the distance.  */
2138
2139 static void
2140 compute_subscript_distance (struct data_dependence_relation *ddr)
2141 {
2142   conflict_function *cf_a, *cf_b;
2143   affine_fn fn_a, fn_b, diff;
2144
2145   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2146     {
2147       unsigned int i;
2148
2149       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2150         {
2151           struct subscript *subscript;
2152
2153           subscript = DDR_SUBSCRIPT (ddr, i);
2154           cf_a = SUB_CONFLICTS_IN_A (subscript);
2155           cf_b = SUB_CONFLICTS_IN_B (subscript);
2156
2157           fn_a = common_affine_function (cf_a);
2158           fn_b = common_affine_function (cf_b);
2159           if (!fn_a.exists () || !fn_b.exists ())
2160             {
2161               SUB_DISTANCE (subscript) = chrec_dont_know;
2162               return;
2163             }
2164           diff = affine_fn_minus (fn_a, fn_b);
2165
2166           if (affine_function_constant_p (diff))
2167             SUB_DISTANCE (subscript) = affine_function_base (diff);
2168           else
2169             SUB_DISTANCE (subscript) = chrec_dont_know;
2170
2171           affine_fn_free (diff);
2172         }
2173     }
2174 }
2175
2176 /* Returns the conflict function for "unknown".  */
2177
2178 static conflict_function *
2179 conflict_fn_not_known (void)
2180 {
2181   conflict_function *fn = XCNEW (conflict_function);
2182   fn->n = NOT_KNOWN;
2183
2184   return fn;
2185 }
2186
2187 /* Returns the conflict function for "independent".  */
2188
2189 static conflict_function *
2190 conflict_fn_no_dependence (void)
2191 {
2192   conflict_function *fn = XCNEW (conflict_function);
2193   fn->n = NO_DEPENDENCE;
2194
2195   return fn;
2196 }
2197
2198 /* Returns true if the address of OBJ is invariant in LOOP.  */
2199
2200 static bool
2201 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
2202 {
2203   while (handled_component_p (obj))
2204     {
2205       if (TREE_CODE (obj) == ARRAY_REF)
2206         {
2207           for (int i = 1; i < 4; ++i)
2208             if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2209                                                         loop->num))
2210               return false;
2211         }
2212       else if (TREE_CODE (obj) == COMPONENT_REF)
2213         {
2214           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2215                                                       loop->num))
2216             return false;
2217         }
2218       obj = TREE_OPERAND (obj, 0);
2219     }
2220
2221   if (!INDIRECT_REF_P (obj)
2222       && TREE_CODE (obj) != MEM_REF)
2223     return true;
2224
2225   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2226                                                   loop->num);
2227 }
2228
2229 /* Returns false if we can prove that data references A and B do not alias,
2230    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2231    considered.  */
2232
2233 bool
2234 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2235                 bool loop_nest)
2236 {
2237   tree addr_a = DR_BASE_OBJECT (a);
2238   tree addr_b = DR_BASE_OBJECT (b);
2239
2240   /* If we are not processing a loop nest but scalar code we
2241      do not need to care about possible cross-iteration dependences
2242      and thus can process the full original reference.  Do so,
2243      similar to how loop invariant motion applies extra offset-based
2244      disambiguation.  */
2245   if (!loop_nest)
2246     {
2247       aff_tree off1, off2;
2248       poly_widest_int size1, size2;
2249       get_inner_reference_aff (DR_REF (a), &off1, &size1);
2250       get_inner_reference_aff (DR_REF (b), &off2, &size2);
2251       aff_combination_scale (&off1, -1);
2252       aff_combination_add (&off2, &off1);
2253       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2254         return false;
2255     }
2256
2257   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2258       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2259       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2260       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2261     return false;
2262
2263   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2264      do not know the size of the base-object.  So we cannot do any
2265      offset/overlap based analysis but have to rely on points-to
2266      information only.  */
2267   if (TREE_CODE (addr_a) == MEM_REF
2268       && (DR_UNCONSTRAINED_BASE (a)
2269           || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2270     {
2271       /* For true dependences we can apply TBAA.  */
2272       if (flag_strict_aliasing
2273           && DR_IS_WRITE (a) && DR_IS_READ (b)
2274           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2275                                      get_alias_set (DR_REF (b))))
2276         return false;
2277       if (TREE_CODE (addr_b) == MEM_REF)
2278         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2279                                        TREE_OPERAND (addr_b, 0));
2280       else
2281         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2282                                        build_fold_addr_expr (addr_b));
2283     }
2284   else if (TREE_CODE (addr_b) == MEM_REF
2285            && (DR_UNCONSTRAINED_BASE (b)
2286                || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
2287     {
2288       /* For true dependences we can apply TBAA.  */
2289       if (flag_strict_aliasing
2290           && DR_IS_WRITE (a) && DR_IS_READ (b)
2291           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2292                                      get_alias_set (DR_REF (b))))
2293         return false;
2294       if (TREE_CODE (addr_a) == MEM_REF)
2295         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2296                                        TREE_OPERAND (addr_b, 0));
2297       else
2298         return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
2299                                        TREE_OPERAND (addr_b, 0));
2300     }
2301
2302   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2303      that is being subsetted in the loop nest.  */
2304   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2305     return refs_output_dependent_p (addr_a, addr_b);
2306   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2307     return refs_anti_dependent_p (addr_a, addr_b);
2308   return refs_may_alias_p (addr_a, addr_b);
2309 }
2310
2311 /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
2312    if it is meaningful to compare their associated access functions
2313    when checking for dependencies.  */
2314
2315 static bool
2316 access_fn_components_comparable_p (tree ref_a, tree ref_b)
2317 {
2318   /* Allow pairs of component refs from the following sets:
2319
2320        { REALPART_EXPR, IMAGPART_EXPR }
2321        { COMPONENT_REF }
2322        { ARRAY_REF }.  */
2323   tree_code code_a = TREE_CODE (ref_a);
2324   tree_code code_b = TREE_CODE (ref_b);
2325   if (code_a == IMAGPART_EXPR)
2326     code_a = REALPART_EXPR;
2327   if (code_b == IMAGPART_EXPR)
2328     code_b = REALPART_EXPR;
2329   if (code_a != code_b)
2330     return false;
2331
2332   if (TREE_CODE (ref_a) == COMPONENT_REF)
2333     /* ??? We cannot simply use the type of operand #0 of the refs here as
2334        the Fortran compiler smuggles type punning into COMPONENT_REFs.
2335        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
2336     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
2337             == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
2338
2339   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
2340                              TREE_TYPE (TREE_OPERAND (ref_b, 0)));
2341 }
2342
2343 /* Initialize a data dependence relation between data accesses A and
2344    B.  NB_LOOPS is the number of loops surrounding the references: the
2345    size of the classic distance/direction vectors.  */
2346
2347 struct data_dependence_relation *
2348 initialize_data_dependence_relation (struct data_reference *a,
2349                                      struct data_reference *b,
2350                                      vec<loop_p> loop_nest)
2351 {
2352   struct data_dependence_relation *res;
2353   unsigned int i;
2354
2355   res = XCNEW (struct data_dependence_relation);
2356   DDR_A (res) = a;
2357   DDR_B (res) = b;
2358   DDR_LOOP_NEST (res).create (0);
2359   DDR_SUBSCRIPTS (res).create (0);
2360   DDR_DIR_VECTS (res).create (0);
2361   DDR_DIST_VECTS (res).create (0);
2362
2363   if (a == NULL || b == NULL)
2364     {
2365       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2366       return res;
2367     }
2368
2369   /* If the data references do not alias, then they are independent.  */
2370   if (!dr_may_alias_p (a, b, loop_nest.exists ()))
2371     {
2372       DDR_ARE_DEPENDENT (res) = chrec_known;
2373       return res;
2374     }
2375
2376   unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
2377   unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
2378   if (num_dimensions_a == 0 || num_dimensions_b == 0)
2379     {
2380       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2381       return res;
2382     }
2383
2384   /* For unconstrained bases, the root (highest-indexed) subscript
2385      describes a variation in the base of the original DR_REF rather
2386      than a component access.  We have no type that accurately describes
2387      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
2388      applying this subscript) so limit the search to the last real
2389      component access.
2390
2391      E.g. for:
2392
2393         void
2394         f (int a[][8], int b[][8])
2395         {
2396           for (int i = 0; i < 8; ++i)
2397             a[i * 2][0] = b[i][0];
2398         }
2399
2400      the a and b accesses have a single ARRAY_REF component reference [0]
2401      but have two subscripts.  */
2402   if (DR_UNCONSTRAINED_BASE (a))
2403     num_dimensions_a -= 1;
2404   if (DR_UNCONSTRAINED_BASE (b))
2405     num_dimensions_b -= 1;
2406
2407   /* These structures describe sequences of component references in
2408      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
2409      specific access function.  */
2410   struct {
2411     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
2412        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
2413        indices.  In C notation, these are the indices of the rightmost
2414        component references; e.g. for a sequence .b.c.d, the start
2415        index is for .d.  */
2416     unsigned int start_a;
2417     unsigned int start_b;
2418
2419     /* The sequence contains LENGTH consecutive access functions from
2420        each DR.  */
2421     unsigned int length;
2422
2423     /* The enclosing objects for the A and B sequences respectively,
2424        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
2425        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
2426     tree object_a;
2427     tree object_b;
2428   } full_seq = {}, struct_seq = {};
2429
2430   /* Before each iteration of the loop:
2431
2432      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
2433      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
2434   unsigned int index_a = 0;
2435   unsigned int index_b = 0;
2436   tree ref_a = DR_REF (a);
2437   tree ref_b = DR_REF (b);
2438
2439   /* Now walk the component references from the final DR_REFs back up to
2440      the enclosing base objects.  Each component reference corresponds
2441      to one access function in the DR, with access function 0 being for
2442      the final DR_REF and the highest-indexed access function being the
2443      one that is applied to the base of the DR.
2444
2445      Look for a sequence of component references whose access functions
2446      are comparable (see access_fn_components_comparable_p).  If more
2447      than one such sequence exists, pick the one nearest the base
2448      (which is the leftmost sequence in C notation).  Store this sequence
2449      in FULL_SEQ.
2450
2451      For example, if we have:
2452
2453         struct foo { struct bar s; ... } (*a)[10], (*b)[10];
2454
2455         A: a[0][i].s.c.d
2456         B: __real b[0][i].s.e[i].f
2457
2458      (where d is the same type as the real component of f) then the access
2459      functions would be:
2460
2461                          0   1   2   3
2462         A:              .d  .c  .s [i]
2463
2464                  0   1   2   3   4   5
2465         B:  __real  .f [i]  .e  .s [i]
2466
2467      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
2468      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
2469      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
2470      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
2471      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
2472      index foo[10] arrays, so is again comparable.  The sequence is
2473      therefore:
2474
2475         A: [1, 3]  (i.e. [i].s.c)
2476         B: [3, 5]  (i.e. [i].s.e)
2477
2478      Also look for sequences of component references whose access
2479      functions are comparable and whose enclosing objects have the same
2480      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
2481      example, STRUCT_SEQ would be:
2482
2483         A: [1, 2]  (i.e. s.c)
2484         B: [3, 4]  (i.e. s.e)  */
2485   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
2486     {
2487       /* REF_A and REF_B must be one of the component access types
2488          allowed by dr_analyze_indices.  */
2489       gcc_checking_assert (access_fn_component_p (ref_a));
2490       gcc_checking_assert (access_fn_component_p (ref_b));
2491
2492       /* Get the immediately-enclosing objects for REF_A and REF_B,
2493          i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
2494          and DR_ACCESS_FN (B, INDEX_B).  */
2495       tree object_a = TREE_OPERAND (ref_a, 0);
2496       tree object_b = TREE_OPERAND (ref_b, 0);
2497
2498       tree type_a = TREE_TYPE (object_a);
2499       tree type_b = TREE_TYPE (object_b);
2500       if (access_fn_components_comparable_p (ref_a, ref_b))
2501         {
2502           /* This pair of component accesses is comparable for dependence
2503              analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
2504              DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
2505           if (full_seq.start_a + full_seq.length != index_a
2506               || full_seq.start_b + full_seq.length != index_b)
2507             {
2508               /* The accesses don't extend the current sequence,
2509                  so start a new one here.  */
2510               full_seq.start_a = index_a;
2511               full_seq.start_b = index_b;
2512               full_seq.length = 0;
2513             }
2514
2515           /* Add this pair of references to the sequence.  */
2516           full_seq.length += 1;
2517           full_seq.object_a = object_a;
2518           full_seq.object_b = object_b;
2519
2520           /* If the enclosing objects are structures (and thus have the
2521              same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
2522           if (TREE_CODE (type_a) == RECORD_TYPE)
2523             struct_seq = full_seq;
2524
2525           /* Move to the next containing reference for both A and B.  */
2526           ref_a = object_a;
2527           ref_b = object_b;
2528           index_a += 1;
2529           index_b += 1;
2530           continue;
2531         }
2532
2533       /* Try to approach equal type sizes.  */
2534       if (!COMPLETE_TYPE_P (type_a)
2535           || !COMPLETE_TYPE_P (type_b)
2536           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
2537           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
2538         break;
2539
2540       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
2541       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
2542       if (size_a <= size_b)
2543         {
2544           index_a += 1;
2545           ref_a = object_a;
2546         }
2547       if (size_b <= size_a)
2548         {
2549           index_b += 1;
2550           ref_b = object_b;
2551         }
2552     }
2553
2554   /* See whether FULL_SEQ ends at the base and whether the two bases
2555      are equal.  We do not care about TBAA or alignment info so we can
2556      use OEP_ADDRESS_OF to avoid false negatives.  */
2557   tree base_a = DR_BASE_OBJECT (a);
2558   tree base_b = DR_BASE_OBJECT (b);
2559   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
2560                       && full_seq.start_b + full_seq.length == num_dimensions_b
2561                       && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
2562                       && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
2563                       && types_compatible_p (TREE_TYPE (base_a),
2564                                              TREE_TYPE (base_b))
2565                       && (!loop_nest.exists ()
2566                           || (object_address_invariant_in_loop_p
2567                               (loop_nest[0], base_a))));
2568
2569   /* If the bases are the same, we can include the base variation too.
2570      E.g. the b accesses in:
2571
2572        for (int i = 0; i < n; ++i)
2573          b[i + 4][0] = b[i][0];
2574
2575      have a definite dependence distance of 4, while for:
2576
2577        for (int i = 0; i < n; ++i)
2578          a[i + 4][0] = b[i][0];
2579
2580      the dependence distance depends on the gap between a and b.
2581
2582      If the bases are different then we can only rely on the sequence
2583      rooted at a structure access, since arrays are allowed to overlap
2584      arbitrarily and change shape arbitrarily.  E.g. we treat this as
2585      valid code:
2586
2587        int a[256];
2588        ...
2589        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
2590
2591      where two lvalues with the same int[4][3] type overlap, and where
2592      both lvalues are distinct from the object's declared type.  */
2593   if (same_base_p)
2594     {
2595       if (DR_UNCONSTRAINED_BASE (a))
2596         full_seq.length += 1;
2597     }
2598   else
2599     full_seq = struct_seq;
2600
2601   /* Punt if we didn't find a suitable sequence.  */
2602   if (full_seq.length == 0)
2603     {
2604       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2605       return res;
2606     }
2607
2608   if (!same_base_p)
2609     {
2610       /* Partial overlap is possible for different bases when strict aliasing
2611          is not in effect.  It's also possible if either base involves a union
2612          access; e.g. for:
2613
2614            struct s1 { int a[2]; };
2615            struct s2 { struct s1 b; int c; };
2616            struct s3 { int d; struct s1 e; };
2617            union u { struct s2 f; struct s3 g; } *p, *q;
2618
2619          the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
2620          "p->g.e" (base "p->g") and might partially overlap the s1 at
2621          "q->g.e" (base "q->g").  */
2622       if (!flag_strict_aliasing
2623           || ref_contains_union_access_p (full_seq.object_a)
2624           || ref_contains_union_access_p (full_seq.object_b))
2625         {
2626           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2627           return res;
2628         }
2629
2630       DDR_COULD_BE_INDEPENDENT_P (res) = true;
2631       if (!loop_nest.exists ()
2632           || (object_address_invariant_in_loop_p (loop_nest[0],
2633                                                   full_seq.object_a)
2634               && object_address_invariant_in_loop_p (loop_nest[0],
2635                                                      full_seq.object_b)))
2636         {
2637           DDR_OBJECT_A (res) = full_seq.object_a;
2638           DDR_OBJECT_B (res) = full_seq.object_b;
2639         }
2640     }
2641
2642   DDR_AFFINE_P (res) = true;
2643   DDR_ARE_DEPENDENT (res) = NULL_TREE;
2644   DDR_SUBSCRIPTS (res).create (full_seq.length);
2645   DDR_LOOP_NEST (res) = loop_nest;
2646   DDR_INNER_LOOP (res) = 0;
2647   DDR_SELF_REFERENCE (res) = false;
2648
2649   for (i = 0; i < full_seq.length; ++i)
2650     {
2651       struct subscript *subscript;
2652
2653       subscript = XNEW (struct subscript);
2654       SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
2655       SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
2656       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
2657       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
2658       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
2659       SUB_DISTANCE (subscript) = chrec_dont_know;
2660       DDR_SUBSCRIPTS (res).safe_push (subscript);
2661     }
2662
2663   return res;
2664 }
2665
2666 /* Frees memory used by the conflict function F.  */
2667
2668 static void
2669 free_conflict_function (conflict_function *f)
2670 {
2671   unsigned i;
2672
2673   if (CF_NONTRIVIAL_P (f))
2674     {
2675       for (i = 0; i < f->n; i++)
2676         affine_fn_free (f->fns[i]);
2677     }
2678   free (f);
2679 }
2680
2681 /* Frees memory used by SUBSCRIPTS.  */
2682
2683 static void
2684 free_subscripts (vec<subscript_p> subscripts)
2685 {
2686   unsigned i;
2687   subscript_p s;
2688
2689   FOR_EACH_VEC_ELT (subscripts, i, s)
2690     {
2691       free_conflict_function (s->conflicting_iterations_in_a);
2692       free_conflict_function (s->conflicting_iterations_in_b);
2693       free (s);
2694     }
2695   subscripts.release ();
2696 }
2697
2698 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
2699    description.  */
2700
2701 static inline void
2702 finalize_ddr_dependent (struct data_dependence_relation *ddr,
2703                         tree chrec)
2704 {
2705   DDR_ARE_DEPENDENT (ddr) = chrec;
2706   free_subscripts (DDR_SUBSCRIPTS (ddr));
2707   DDR_SUBSCRIPTS (ddr).create (0);
2708 }
2709
2710 /* The dependence relation DDR cannot be represented by a distance
2711    vector.  */
2712
2713 static inline void
2714 non_affine_dependence_relation (struct data_dependence_relation *ddr)
2715 {
2716   if (dump_file && (dump_flags & TDF_DETAILS))
2717     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
2718
2719   DDR_AFFINE_P (ddr) = false;
2720 }
2721
2722 \f
2723
2724 /* This section contains the classic Banerjee tests.  */
2725
2726 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
2727    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
2728
2729 static inline bool
2730 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2731 {
2732   return (evolution_function_is_constant_p (chrec_a)
2733           && evolution_function_is_constant_p (chrec_b));
2734 }
2735
2736 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
2737    variable, i.e., if the SIV (Single Index Variable) test is true.  */
2738
2739 static bool
2740 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2741 {
2742   if ((evolution_function_is_constant_p (chrec_a)
2743        && evolution_function_is_univariate_p (chrec_b))
2744       || (evolution_function_is_constant_p (chrec_b)
2745           && evolution_function_is_univariate_p (chrec_a)))
2746     return true;
2747
2748   if (evolution_function_is_univariate_p (chrec_a)
2749       && evolution_function_is_univariate_p (chrec_b))
2750     {
2751       switch (TREE_CODE (chrec_a))
2752         {
2753         case POLYNOMIAL_CHREC:
2754           switch (TREE_CODE (chrec_b))
2755             {
2756             case POLYNOMIAL_CHREC:
2757               if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
2758                 return false;
2759               /* FALLTHRU */
2760
2761             default:
2762               return true;
2763             }
2764
2765         default:
2766           return true;
2767         }
2768     }
2769
2770   return false;
2771 }
2772
2773 /* Creates a conflict function with N dimensions.  The affine functions
2774    in each dimension follow.  */
2775
2776 static conflict_function *
2777 conflict_fn (unsigned n, ...)
2778 {
2779   unsigned i;
2780   conflict_function *ret = XCNEW (conflict_function);
2781   va_list ap;
2782
2783   gcc_assert (n > 0 && n <= MAX_DIM);
2784   va_start (ap, n);
2785
2786   ret->n = n;
2787   for (i = 0; i < n; i++)
2788     ret->fns[i] = va_arg (ap, affine_fn);
2789   va_end (ap);
2790
2791   return ret;
2792 }
2793
2794 /* Returns constant affine function with value CST.  */
2795
2796 static affine_fn
2797 affine_fn_cst (tree cst)
2798 {
2799   affine_fn fn;
2800   fn.create (1);
2801   fn.quick_push (cst);
2802   return fn;
2803 }
2804
2805 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
2806
2807 static affine_fn
2808 affine_fn_univar (tree cst, unsigned dim, tree coef)
2809 {
2810   affine_fn fn;
2811   fn.create (dim + 1);
2812   unsigned i;
2813
2814   gcc_assert (dim > 0);
2815   fn.quick_push (cst);
2816   for (i = 1; i < dim; i++)
2817     fn.quick_push (integer_zero_node);
2818   fn.quick_push (coef);
2819   return fn;
2820 }
2821
2822 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
2823    *OVERLAPS_B are initialized to the functions that describe the
2824    relation between the elements accessed twice by CHREC_A and
2825    CHREC_B.  For k >= 0, the following property is verified:
2826
2827    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2828
2829 static void
2830 analyze_ziv_subscript (tree chrec_a,
2831                        tree chrec_b,
2832                        conflict_function **overlaps_a,
2833                        conflict_function **overlaps_b,
2834                        tree *last_conflicts)
2835 {
2836   tree type, difference;
2837   dependence_stats.num_ziv++;
2838
2839   if (dump_file && (dump_flags & TDF_DETAILS))
2840     fprintf (dump_file, "(analyze_ziv_subscript \n");
2841
2842   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2843   chrec_a = chrec_convert (type, chrec_a, NULL);
2844   chrec_b = chrec_convert (type, chrec_b, NULL);
2845   difference = chrec_fold_minus (type, chrec_a, chrec_b);
2846
2847   switch (TREE_CODE (difference))
2848     {
2849     case INTEGER_CST:
2850       if (integer_zerop (difference))
2851         {
2852           /* The difference is equal to zero: the accessed index
2853              overlaps for each iteration in the loop.  */
2854           *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2855           *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2856           *last_conflicts = chrec_dont_know;
2857           dependence_stats.num_ziv_dependent++;
2858         }
2859       else
2860         {
2861           /* The accesses do not overlap.  */
2862           *overlaps_a = conflict_fn_no_dependence ();
2863           *overlaps_b = conflict_fn_no_dependence ();
2864           *last_conflicts = integer_zero_node;
2865           dependence_stats.num_ziv_independent++;
2866         }
2867       break;
2868
2869     default:
2870       /* We're not sure whether the indexes overlap.  For the moment,
2871          conservatively answer "don't know".  */
2872       if (dump_file && (dump_flags & TDF_DETAILS))
2873         fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
2874
2875       *overlaps_a = conflict_fn_not_known ();
2876       *overlaps_b = conflict_fn_not_known ();
2877       *last_conflicts = chrec_dont_know;
2878       dependence_stats.num_ziv_unimplemented++;
2879       break;
2880     }
2881
2882   if (dump_file && (dump_flags & TDF_DETAILS))
2883     fprintf (dump_file, ")\n");
2884 }
2885
2886 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
2887    and only if it fits to the int type.  If this is not the case, or the
2888    bound  on the number of iterations of LOOP could not be derived, returns
2889    chrec_dont_know.  */
2890
2891 static tree
2892 max_stmt_executions_tree (struct loop *loop)
2893 {
2894   widest_int nit;
2895
2896   if (!max_stmt_executions (loop, &nit))
2897     return chrec_dont_know;
2898
2899   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
2900     return chrec_dont_know;
2901
2902   return wide_int_to_tree (unsigned_type_node, nit);
2903 }
2904
2905 /* Determine whether the CHREC is always positive/negative.  If the expression
2906    cannot be statically analyzed, return false, otherwise set the answer into
2907    VALUE.  */
2908
2909 static bool
2910 chrec_is_positive (tree chrec, bool *value)
2911 {
2912   bool value0, value1, value2;
2913   tree end_value, nb_iter;
2914
2915   switch (TREE_CODE (chrec))
2916     {
2917     case POLYNOMIAL_CHREC:
2918       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
2919           || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
2920         return false;
2921
2922       /* FIXME -- overflows.  */
2923       if (value0 == value1)
2924         {
2925           *value = value0;
2926           return true;
2927         }
2928
2929       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
2930          and the proof consists in showing that the sign never
2931          changes during the execution of the loop, from 0 to
2932          loop->nb_iterations.  */
2933       if (!evolution_function_is_affine_p (chrec))
2934         return false;
2935
2936       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
2937       if (chrec_contains_undetermined (nb_iter))
2938         return false;
2939
2940 #if 0
2941       /* TODO -- If the test is after the exit, we may decrease the number of
2942          iterations by one.  */
2943       if (after_exit)
2944         nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
2945 #endif
2946
2947       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
2948
2949       if (!chrec_is_positive (end_value, &value2))
2950         return false;
2951
2952       *value = value0;
2953       return value0 == value1;
2954
2955     case INTEGER_CST:
2956       switch (tree_int_cst_sgn (chrec))
2957         {
2958         case -1:
2959           *value = false;
2960           break;
2961         case 1:
2962           *value = true;
2963           break;
2964         default:
2965           return false;
2966         }
2967       return true;
2968
2969     default:
2970       return false;
2971     }
2972 }
2973
2974
2975 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
2976    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
2977    *OVERLAPS_B are initialized to the functions that describe the
2978    relation between the elements accessed twice by CHREC_A and
2979    CHREC_B.  For k >= 0, the following property is verified:
2980
2981    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2982
2983 static void
2984 analyze_siv_subscript_cst_affine (tree chrec_a,
2985                                   tree chrec_b,
2986                                   conflict_function **overlaps_a,
2987                                   conflict_function **overlaps_b,
2988                                   tree *last_conflicts)
2989 {
2990   bool value0, value1, value2;
2991   tree type, difference, tmp;
2992
2993   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2994   chrec_a = chrec_convert (type, chrec_a, NULL);
2995   chrec_b = chrec_convert (type, chrec_b, NULL);
2996   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
2997
2998   /* Special case overlap in the first iteration.  */
2999   if (integer_zerop (difference))
3000     {
3001       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3002       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3003       *last_conflicts = integer_one_node;
3004       return;
3005     }
3006
3007   if (!chrec_is_positive (initial_condition (difference), &value0))
3008     {
3009       if (dump_file && (dump_flags & TDF_DETAILS))
3010         fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3011
3012       dependence_stats.num_siv_unimplemented++;
3013       *overlaps_a = conflict_fn_not_known ();
3014       *overlaps_b = conflict_fn_not_known ();
3015       *last_conflicts = chrec_dont_know;
3016       return;
3017     }
3018   else
3019     {
3020       if (value0 == false)
3021         {
3022           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3023               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3024             {
3025               if (dump_file && (dump_flags & TDF_DETAILS))
3026                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3027
3028               *overlaps_a = conflict_fn_not_known ();
3029               *overlaps_b = conflict_fn_not_known ();
3030               *last_conflicts = chrec_dont_know;
3031               dependence_stats.num_siv_unimplemented++;
3032               return;
3033             }
3034           else
3035             {
3036               if (value1 == true)
3037                 {
3038                   /* Example:
3039                      chrec_a = 12
3040                      chrec_b = {10, +, 1}
3041                   */
3042
3043                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3044                     {
3045                       HOST_WIDE_INT numiter;
3046                       struct loop *loop = get_chrec_loop (chrec_b);
3047
3048                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3049                       tmp = fold_build2 (EXACT_DIV_EXPR, type,
3050                                          fold_build1 (ABS_EXPR, type, difference),
3051                                          CHREC_RIGHT (chrec_b));
3052                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3053                       *last_conflicts = integer_one_node;
3054
3055
3056                       /* Perform weak-zero siv test to see if overlap is
3057                          outside the loop bounds.  */
3058                       numiter = max_stmt_executions_int (loop);
3059
3060                       if (numiter >= 0
3061                           && compare_tree_int (tmp, numiter) > 0)
3062                         {
3063                           free_conflict_function (*overlaps_a);
3064                           free_conflict_function (*overlaps_b);
3065                           *overlaps_a = conflict_fn_no_dependence ();
3066                           *overlaps_b = conflict_fn_no_dependence ();
3067                           *last_conflicts = integer_zero_node;
3068                           dependence_stats.num_siv_independent++;
3069                           return;
3070                         }
3071                       dependence_stats.num_siv_dependent++;
3072                       return;
3073                     }
3074
3075                   /* When the step does not divide the difference, there are
3076                      no overlaps.  */
3077                   else
3078                     {
3079                       *overlaps_a = conflict_fn_no_dependence ();
3080                       *overlaps_b = conflict_fn_no_dependence ();
3081                       *last_conflicts = integer_zero_node;
3082                       dependence_stats.num_siv_independent++;
3083                       return;
3084                     }
3085                 }
3086
3087               else
3088                 {
3089                   /* Example:
3090                      chrec_a = 12
3091                      chrec_b = {10, +, -1}
3092
3093                      In this case, chrec_a will not overlap with chrec_b.  */
3094                   *overlaps_a = conflict_fn_no_dependence ();
3095                   *overlaps_b = conflict_fn_no_dependence ();
3096                   *last_conflicts = integer_zero_node;
3097                   dependence_stats.num_siv_independent++;
3098                   return;
3099                 }
3100             }
3101         }
3102       else
3103         {
3104           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3105               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3106             {
3107               if (dump_file && (dump_flags & TDF_DETAILS))
3108                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3109
3110               *overlaps_a = conflict_fn_not_known ();
3111               *overlaps_b = conflict_fn_not_known ();
3112               *last_conflicts = chrec_dont_know;
3113               dependence_stats.num_siv_unimplemented++;
3114               return;
3115             }
3116           else
3117             {
3118               if (value2 == false)
3119                 {
3120                   /* Example:
3121                      chrec_a = 3
3122                      chrec_b = {10, +, -1}
3123                   */
3124                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3125                     {
3126                       HOST_WIDE_INT numiter;
3127                       struct loop *loop = get_chrec_loop (chrec_b);
3128
3129                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3130                       tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3131                                          CHREC_RIGHT (chrec_b));
3132                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3133                       *last_conflicts = integer_one_node;
3134
3135                       /* Perform weak-zero siv test to see if overlap is
3136                          outside the loop bounds.  */
3137                       numiter = max_stmt_executions_int (loop);
3138
3139                       if (numiter >= 0
3140                           && compare_tree_int (tmp, numiter) > 0)
3141                         {
3142                           free_conflict_function (*overlaps_a);
3143                           free_conflict_function (*overlaps_b);
3144                           *overlaps_a = conflict_fn_no_dependence ();
3145                           *overlaps_b = conflict_fn_no_dependence ();
3146                           *last_conflicts = integer_zero_node;
3147                           dependence_stats.num_siv_independent++;
3148                           return;
3149                         }
3150                       dependence_stats.num_siv_dependent++;
3151                       return;
3152                     }
3153
3154                   /* When the step does not divide the difference, there
3155                      are no overlaps.  */
3156                   else
3157                     {
3158                       *overlaps_a = conflict_fn_no_dependence ();
3159                       *overlaps_b = conflict_fn_no_dependence ();
3160                       *last_conflicts = integer_zero_node;
3161                       dependence_stats.num_siv_independent++;
3162                       return;
3163                     }
3164                 }
3165               else
3166                 {
3167                   /* Example:
3168                      chrec_a = 3
3169                      chrec_b = {4, +, 1}
3170
3171                      In this case, chrec_a will not overlap with chrec_b.  */
3172                   *overlaps_a = conflict_fn_no_dependence ();
3173                   *overlaps_b = conflict_fn_no_dependence ();
3174                   *last_conflicts = integer_zero_node;
3175                   dependence_stats.num_siv_independent++;
3176                   return;
3177                 }
3178             }
3179         }
3180     }
3181 }
3182
3183 /* Helper recursive function for initializing the matrix A.  Returns
3184    the initial value of CHREC.  */
3185
3186 static tree
3187 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3188 {
3189   gcc_assert (chrec);
3190
3191   switch (TREE_CODE (chrec))
3192     {
3193     case POLYNOMIAL_CHREC:
3194       A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
3195       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3196
3197     case PLUS_EXPR:
3198     case MULT_EXPR:
3199     case MINUS_EXPR:
3200       {
3201         tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3202         tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3203
3204         return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3205       }
3206
3207     CASE_CONVERT:
3208       {
3209         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3210         return chrec_convert (chrec_type (chrec), op, NULL);
3211       }
3212
3213     case BIT_NOT_EXPR:
3214       {
3215         /* Handle ~X as -1 - X.  */
3216         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3217         return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
3218                               build_int_cst (TREE_TYPE (chrec), -1), op);
3219       }
3220
3221     case INTEGER_CST:
3222       return chrec;
3223
3224     default:
3225       gcc_unreachable ();
3226       return NULL_TREE;
3227     }
3228 }
3229
3230 #define FLOOR_DIV(x,y) ((x) / (y))
3231
3232 /* Solves the special case of the Diophantine equation:
3233    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3234
3235    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
3236    number of iterations that loops X and Y run.  The overlaps will be
3237    constructed as evolutions in dimension DIM.  */
3238
3239 static void
3240 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
3241                                          HOST_WIDE_INT step_a,
3242                                          HOST_WIDE_INT step_b,
3243                                          affine_fn *overlaps_a,
3244                                          affine_fn *overlaps_b,
3245                                          tree *last_conflicts, int dim)
3246 {
3247   if (((step_a > 0 && step_b > 0)
3248        || (step_a < 0 && step_b < 0)))
3249     {
3250       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
3251       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
3252
3253       gcd_steps_a_b = gcd (step_a, step_b);
3254       step_overlaps_a = step_b / gcd_steps_a_b;
3255       step_overlaps_b = step_a / gcd_steps_a_b;
3256
3257       if (niter > 0)
3258         {
3259           tau2 = FLOOR_DIV (niter, step_overlaps_a);
3260           tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
3261           last_conflict = tau2;
3262           *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3263         }
3264       else
3265         *last_conflicts = chrec_dont_know;
3266
3267       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
3268                                       build_int_cst (NULL_TREE,
3269                                                      step_overlaps_a));
3270       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
3271                                       build_int_cst (NULL_TREE,
3272                                                      step_overlaps_b));
3273     }
3274
3275   else
3276     {
3277       *overlaps_a = affine_fn_cst (integer_zero_node);
3278       *overlaps_b = affine_fn_cst (integer_zero_node);
3279       *last_conflicts = integer_zero_node;
3280     }
3281 }
3282
3283 /* Solves the special case of a Diophantine equation where CHREC_A is
3284    an affine bivariate function, and CHREC_B is an affine univariate
3285    function.  For example,
3286
3287    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
3288
3289    has the following overlapping functions:
3290
3291    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
3292    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
3293    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
3294
3295    FORNOW: This is a specialized implementation for a case occurring in
3296    a common benchmark.  Implement the general algorithm.  */
3297
3298 static void
3299 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
3300                                       conflict_function **overlaps_a,
3301                                       conflict_function **overlaps_b,
3302                                       tree *last_conflicts)
3303 {
3304   bool xz_p, yz_p, xyz_p;
3305   HOST_WIDE_INT step_x, step_y, step_z;
3306   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
3307   affine_fn overlaps_a_xz, overlaps_b_xz;
3308   affine_fn overlaps_a_yz, overlaps_b_yz;
3309   affine_fn overlaps_a_xyz, overlaps_b_xyz;
3310   affine_fn ova1, ova2, ovb;
3311   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
3312
3313   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
3314   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
3315   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
3316
3317   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
3318   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
3319   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
3320
3321   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
3322     {
3323       if (dump_file && (dump_flags & TDF_DETAILS))
3324         fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
3325
3326       *overlaps_a = conflict_fn_not_known ();
3327       *overlaps_b = conflict_fn_not_known ();
3328       *last_conflicts = chrec_dont_know;
3329       return;
3330     }
3331
3332   niter = MIN (niter_x, niter_z);
3333   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
3334                                            &overlaps_a_xz,
3335                                            &overlaps_b_xz,
3336                                            &last_conflicts_xz, 1);
3337   niter = MIN (niter_y, niter_z);
3338   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
3339                                            &overlaps_a_yz,
3340                                            &overlaps_b_yz,
3341                                            &last_conflicts_yz, 2);
3342   niter = MIN (niter_x, niter_z);
3343   niter = MIN (niter_y, niter);
3344   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
3345                                            &overlaps_a_xyz,
3346                                            &overlaps_b_xyz,
3347                                            &last_conflicts_xyz, 3);
3348
3349   xz_p = !integer_zerop (last_conflicts_xz);
3350   yz_p = !integer_zerop (last_conflicts_yz);
3351   xyz_p = !integer_zerop (last_conflicts_xyz);
3352
3353   if (xz_p || yz_p || xyz_p)
3354     {
3355       ova1 = affine_fn_cst (integer_zero_node);
3356       ova2 = affine_fn_cst (integer_zero_node);
3357       ovb = affine_fn_cst (integer_zero_node);
3358       if (xz_p)
3359         {
3360           affine_fn t0 = ova1;
3361           affine_fn t2 = ovb;
3362
3363           ova1 = affine_fn_plus (ova1, overlaps_a_xz);
3364           ovb = affine_fn_plus (ovb, overlaps_b_xz);
3365           affine_fn_free (t0);
3366           affine_fn_free (t2);
3367           *last_conflicts = last_conflicts_xz;
3368         }
3369       if (yz_p)
3370         {
3371           affine_fn t0 = ova2;
3372           affine_fn t2 = ovb;
3373
3374           ova2 = affine_fn_plus (ova2, overlaps_a_yz);
3375           ovb = affine_fn_plus (ovb, overlaps_b_yz);
3376           affine_fn_free (t0);
3377           affine_fn_free (t2);
3378           *last_conflicts = last_conflicts_yz;
3379         }
3380       if (xyz_p)
3381         {
3382           affine_fn t0 = ova1;
3383           affine_fn t2 = ova2;
3384           affine_fn t4 = ovb;
3385
3386           ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
3387           ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
3388           ovb = affine_fn_plus (ovb, overlaps_b_xyz);
3389           affine_fn_free (t0);
3390           affine_fn_free (t2);
3391           affine_fn_free (t4);
3392           *last_conflicts = last_conflicts_xyz;
3393         }
3394       *overlaps_a = conflict_fn (2, ova1, ova2);
3395       *overlaps_b = conflict_fn (1, ovb);
3396     }
3397   else
3398     {
3399       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3400       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3401       *last_conflicts = integer_zero_node;
3402     }
3403
3404   affine_fn_free (overlaps_a_xz);
3405   affine_fn_free (overlaps_b_xz);
3406   affine_fn_free (overlaps_a_yz);
3407   affine_fn_free (overlaps_b_yz);
3408   affine_fn_free (overlaps_a_xyz);
3409   affine_fn_free (overlaps_b_xyz);
3410 }
3411
3412 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
3413
3414 static void
3415 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
3416                     int size)
3417 {
3418   memcpy (vec2, vec1, size * sizeof (*vec1));
3419 }
3420
3421 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
3422
3423 static void
3424 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
3425                     int m, int n)
3426 {
3427   int i;
3428
3429   for (i = 0; i < m; i++)
3430     lambda_vector_copy (mat1[i], mat2[i], n);
3431 }
3432
3433 /* Store the N x N identity matrix in MAT.  */
3434
3435 static void
3436 lambda_matrix_id (lambda_matrix mat, int size)
3437 {
3438   int i, j;
3439
3440   for (i = 0; i < size; i++)
3441     for (j = 0; j < size; j++)
3442       mat[i][j] = (i == j) ? 1 : 0;
3443 }
3444
3445 /* Return the index of the first nonzero element of vector VEC1 between
3446    START and N.  We must have START <= N.
3447    Returns N if VEC1 is the zero vector.  */
3448
3449 static int
3450 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
3451 {
3452   int j = start;
3453   while (j < n && vec1[j] == 0)
3454     j++;
3455   return j;
3456 }
3457
3458 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3459    R2 = R2 + CONST1 * R1.  */
3460
3461 static void
3462 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
3463                        lambda_int const1)
3464 {
3465   int i;
3466
3467   if (const1 == 0)
3468     return;
3469
3470   for (i = 0; i < n; i++)
3471     mat[r2][i] += const1 * mat[r1][i];
3472 }
3473
3474 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3475    and store the result in VEC2.  */
3476
3477 static void
3478 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
3479                           int size, lambda_int const1)
3480 {
3481   int i;
3482
3483   if (const1 == 0)
3484     lambda_vector_clear (vec2, size);
3485   else
3486     for (i = 0; i < size; i++)
3487       vec2[i] = const1 * vec1[i];
3488 }
3489
3490 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
3491
3492 static void
3493 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
3494                       int size)
3495 {
3496   lambda_vector_mult_const (vec1, vec2, size, -1);
3497 }
3498
3499 /* Negate row R1 of matrix MAT which has N columns.  */
3500
3501 static void
3502 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
3503 {
3504   lambda_vector_negate (mat[r1], mat[r1], n);
3505 }
3506
3507 /* Return true if two vectors are equal.  */
3508
3509 static bool
3510 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
3511 {
3512   int i;
3513   for (i = 0; i < size; i++)
3514     if (vec1[i] != vec2[i])
3515       return false;
3516   return true;
3517 }
3518
3519 /* Given an M x N integer matrix A, this function determines an M x
3520    M unimodular matrix U, and an M x N echelon matrix S such that
3521    "U.A = S".  This decomposition is also known as "right Hermite".
3522
3523    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3524    Restructuring Compilers" Utpal Banerjee.  */
3525
3526 static void
3527 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
3528                              lambda_matrix S, lambda_matrix U)
3529 {
3530   int i, j, i0 = 0;
3531
3532   lambda_matrix_copy (A, S, m, n);
3533   lambda_matrix_id (U, m);
3534
3535   for (j = 0; j < n; j++)
3536     {
3537       if (lambda_vector_first_nz (S[j], m, i0) < m)
3538         {
3539           ++i0;
3540           for (i = m - 1; i >= i0; i--)
3541             {
3542               while (S[i][j] != 0)
3543                 {
3544                   lambda_int sigma, factor, a, b;
3545
3546                   a = S[i-1][j];
3547                   b = S[i][j];
3548                   sigma = (a * b < 0) ? -1: 1;
3549                   a = abs_hwi (a);
3550                   b = abs_hwi (b);
3551                   factor = sigma * (a / b);
3552
3553                   lambda_matrix_row_add (S, n, i, i-1, -factor);
3554                   std::swap (S[i], S[i-1]);
3555
3556                   lambda_matrix_row_add (U, m, i, i-1, -factor);
3557                   std::swap (U[i], U[i-1]);
3558                 }
3559             }
3560         }
3561     }
3562 }
3563
3564 /* Determines the overlapping elements due to accesses CHREC_A and
3565    CHREC_B, that are affine functions.  This function cannot handle
3566    symbolic evolution functions, ie. when initial conditions are
3567    parameters, because it uses lambda matrices of integers.  */
3568
3569 static void
3570 analyze_subscript_affine_affine (tree chrec_a,
3571                                  tree chrec_b,
3572                                  conflict_function **overlaps_a,
3573                                  conflict_function **overlaps_b,
3574                                  tree *last_conflicts)
3575 {
3576   unsigned nb_vars_a, nb_vars_b, dim;
3577   HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
3578   lambda_matrix A, U, S;
3579   struct obstack scratch_obstack;
3580
3581   if (eq_evolutions_p (chrec_a, chrec_b))
3582     {
3583       /* The accessed index overlaps for each iteration in the
3584          loop.  */
3585       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3586       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3587       *last_conflicts = chrec_dont_know;
3588       return;
3589     }
3590   if (dump_file && (dump_flags & TDF_DETAILS))
3591     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
3592
3593   /* For determining the initial intersection, we have to solve a
3594      Diophantine equation.  This is the most time consuming part.
3595
3596      For answering to the question: "Is there a dependence?" we have
3597      to prove that there exists a solution to the Diophantine
3598      equation, and that the solution is in the iteration domain,
3599      i.e. the solution is positive or zero, and that the solution
3600      happens before the upper bound loop.nb_iterations.  Otherwise
3601      there is no dependence.  This function outputs a description of
3602      the iterations that hold the intersections.  */
3603
3604   nb_vars_a = nb_vars_in_chrec (chrec_a);
3605   nb_vars_b = nb_vars_in_chrec (chrec_b);
3606
3607   gcc_obstack_init (&scratch_obstack);
3608
3609   dim = nb_vars_a + nb_vars_b;
3610   U = lambda_matrix_new (dim, dim, &scratch_obstack);
3611   A = lambda_matrix_new (dim, 1, &scratch_obstack);
3612   S = lambda_matrix_new (dim, 1, &scratch_obstack);
3613
3614   init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
3615   init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
3616   gamma = init_b - init_a;
3617
3618   /* Don't do all the hard work of solving the Diophantine equation
3619      when we already know the solution: for example,
3620      | {3, +, 1}_1
3621      | {3, +, 4}_2
3622      | gamma = 3 - 3 = 0.
3623      Then the first overlap occurs during the first iterations:
3624      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
3625   */
3626   if (gamma == 0)
3627     {
3628       if (nb_vars_a == 1 && nb_vars_b == 1)
3629         {
3630           HOST_WIDE_INT step_a, step_b;
3631           HOST_WIDE_INT niter, niter_a, niter_b;
3632           affine_fn ova, ovb;
3633
3634           niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
3635           niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
3636           niter = MIN (niter_a, niter_b);
3637           step_a = int_cst_value (CHREC_RIGHT (chrec_a));
3638           step_b = int_cst_value (CHREC_RIGHT (chrec_b));
3639
3640           compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
3641                                                    &ova, &ovb,
3642                                                    last_conflicts, 1);
3643           *overlaps_a = conflict_fn (1, ova);
3644           *overlaps_b = conflict_fn (1, ovb);
3645         }
3646
3647       else if (nb_vars_a == 2 && nb_vars_b == 1)
3648         compute_overlap_steps_for_affine_1_2
3649           (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
3650
3651       else if (nb_vars_a == 1 && nb_vars_b == 2)
3652         compute_overlap_steps_for_affine_1_2
3653           (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
3654
3655       else
3656         {
3657           if (dump_file && (dump_flags & TDF_DETAILS))
3658             fprintf (dump_file, "affine-affine test failed: too many variables.\n");
3659           *overlaps_a = conflict_fn_not_known ();
3660           *overlaps_b = conflict_fn_not_known ();
3661           *last_conflicts = chrec_dont_know;
3662         }
3663       goto end_analyze_subs_aa;
3664     }
3665
3666   /* U.A = S */
3667   lambda_matrix_right_hermite (A, dim, 1, S, U);
3668
3669   if (S[0][0] < 0)
3670     {
3671       S[0][0] *= -1;
3672       lambda_matrix_row_negate (U, dim, 0);
3673     }
3674   gcd_alpha_beta = S[0][0];
3675
3676   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
3677      but that is a quite strange case.  Instead of ICEing, answer
3678      don't know.  */
3679   if (gcd_alpha_beta == 0)
3680     {
3681       *overlaps_a = conflict_fn_not_known ();
3682       *overlaps_b = conflict_fn_not_known ();
3683       *last_conflicts = chrec_dont_know;
3684       goto end_analyze_subs_aa;
3685     }
3686
3687   /* The classic "gcd-test".  */
3688   if (!int_divides_p (gcd_alpha_beta, gamma))
3689     {
3690       /* The "gcd-test" has determined that there is no integer
3691          solution, i.e. there is no dependence.  */
3692       *overlaps_a = conflict_fn_no_dependence ();
3693       *overlaps_b = conflict_fn_no_dependence ();
3694       *last_conflicts = integer_zero_node;
3695     }
3696
3697   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
3698   else if (nb_vars_a == 1 && nb_vars_b == 1)
3699     {
3700       /* Both functions should have the same evolution sign.  */
3701       if (((A[0][0] > 0 && -A[1][0] > 0)
3702            || (A[0][0] < 0 && -A[1][0] < 0)))
3703         {
3704           /* The solutions are given by:
3705              |
3706              | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
3707              |                           [u21 u22]    [y0]
3708
3709              For a given integer t.  Using the following variables,
3710
3711              | i0 = u11 * gamma / gcd_alpha_beta
3712              | j0 = u12 * gamma / gcd_alpha_beta
3713              | i1 = u21
3714              | j1 = u22
3715
3716              the solutions are:
3717
3718              | x0 = i0 + i1 * t,
3719              | y0 = j0 + j1 * t.  */
3720           HOST_WIDE_INT i0, j0, i1, j1;
3721
3722           i0 = U[0][0] * gamma / gcd_alpha_beta;
3723           j0 = U[0][1] * gamma / gcd_alpha_beta;
3724           i1 = U[1][0];
3725           j1 = U[1][1];
3726
3727           if ((i1 == 0 && i0 < 0)
3728               || (j1 == 0 && j0 < 0))
3729             {
3730               /* There is no solution.
3731                  FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
3732                  falls in here, but for the moment we don't look at the
3733                  upper bound of the iteration domain.  */
3734               *overlaps_a = conflict_fn_no_dependence ();
3735               *overlaps_b = conflict_fn_no_dependence ();
3736               *last_conflicts = integer_zero_node;
3737               goto end_analyze_subs_aa;
3738             }
3739
3740           if (i1 > 0 && j1 > 0)
3741             {
3742               HOST_WIDE_INT niter_a
3743                 = max_stmt_executions_int (get_chrec_loop (chrec_a));
3744               HOST_WIDE_INT niter_b
3745                 = max_stmt_executions_int (get_chrec_loop (chrec_b));
3746               HOST_WIDE_INT niter = MIN (niter_a, niter_b);
3747
3748               /* (X0, Y0) is a solution of the Diophantine equation:
3749                  "chrec_a (X0) = chrec_b (Y0)".  */
3750               HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
3751                                         CEIL (-j0, j1));
3752               HOST_WIDE_INT x0 = i1 * tau1 + i0;
3753               HOST_WIDE_INT y0 = j1 * tau1 + j0;
3754
3755               /* (X1, Y1) is the smallest positive solution of the eq
3756                  "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
3757                  first conflict occurs.  */
3758               HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
3759               HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
3760               HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
3761
3762               if (niter > 0)
3763                 {
3764                   /* If the overlap occurs outside of the bounds of the
3765                      loop, there is no dependence.  */
3766                   if (x1 >= niter_a || y1 >= niter_b)
3767                     {
3768                       *overlaps_a = conflict_fn_no_dependence ();
3769                       *overlaps_b = conflict_fn_no_dependence ();
3770                       *last_conflicts = integer_zero_node;
3771                       goto end_analyze_subs_aa;
3772                     }
3773
3774                   /* max stmt executions can get quite large, avoid
3775                      overflows by using wide ints here.  */
3776                   widest_int tau2
3777                     = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
3778                                 wi::sdiv_floor (wi::sub (niter_b, j0), j1));
3779                   widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
3780                   if (wi::min_precision (last_conflict, SIGNED)
3781                       <= TYPE_PRECISION (integer_type_node))
3782                     *last_conflicts
3783                        = build_int_cst (integer_type_node,
3784                                         last_conflict.to_shwi ());
3785                   else
3786                     *last_conflicts = chrec_dont_know;
3787                 }
3788               else
3789                 *last_conflicts = chrec_dont_know;
3790
3791               *overlaps_a
3792                 = conflict_fn (1,
3793                                affine_fn_univar (build_int_cst (NULL_TREE, x1),
3794                                                  1,
3795                                                  build_int_cst (NULL_TREE, i1)));
3796               *overlaps_b
3797                 = conflict_fn (1,
3798                                affine_fn_univar (build_int_cst (NULL_TREE, y1),
3799                                                  1,
3800                                                  build_int_cst (NULL_TREE, j1)));
3801             }
3802           else
3803             {
3804               /* FIXME: For the moment, the upper bound of the
3805                  iteration domain for i and j is not checked.  */
3806               if (dump_file && (dump_flags & TDF_DETAILS))
3807                 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3808               *overlaps_a = conflict_fn_not_known ();
3809               *overlaps_b = conflict_fn_not_known ();
3810               *last_conflicts = chrec_dont_know;
3811             }
3812         }
3813       else
3814         {
3815           if (dump_file && (dump_flags & TDF_DETAILS))
3816             fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3817           *overlaps_a = conflict_fn_not_known ();
3818           *overlaps_b = conflict_fn_not_known ();
3819           *last_conflicts = chrec_dont_know;
3820         }
3821     }
3822   else
3823     {
3824       if (dump_file && (dump_flags & TDF_DETAILS))
3825         fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3826       *overlaps_a = conflict_fn_not_known ();
3827       *overlaps_b = conflict_fn_not_known ();
3828       *last_conflicts = chrec_dont_know;
3829     }
3830
3831 end_analyze_subs_aa:
3832   obstack_free (&scratch_obstack, NULL);
3833   if (dump_file && (dump_flags & TDF_DETAILS))
3834     {
3835       fprintf (dump_file, "  (overlaps_a = ");
3836       dump_conflict_function (dump_file, *overlaps_a);
3837       fprintf (dump_file, ")\n  (overlaps_b = ");
3838       dump_conflict_function (dump_file, *overlaps_b);
3839       fprintf (dump_file, "))\n");
3840     }
3841 }
3842
3843 /* Returns true when analyze_subscript_affine_affine can be used for
3844    determining the dependence relation between chrec_a and chrec_b,
3845    that contain symbols.  This function modifies chrec_a and chrec_b
3846    such that the analysis result is the same, and such that they don't
3847    contain symbols, and then can safely be passed to the analyzer.
3848
3849    Example: The analysis of the following tuples of evolutions produce
3850    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
3851    vs. {0, +, 1}_1
3852
3853    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
3854    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
3855 */
3856
3857 static bool
3858 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
3859 {
3860   tree diff, type, left_a, left_b, right_b;
3861
3862   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
3863       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
3864     /* FIXME: For the moment not handled.  Might be refined later.  */
3865     return false;
3866
3867   type = chrec_type (*chrec_a);
3868   left_a = CHREC_LEFT (*chrec_a);
3869   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
3870   diff = chrec_fold_minus (type, left_a, left_b);
3871
3872   if (!evolution_function_is_constant_p (diff))
3873     return false;
3874
3875   if (dump_file && (dump_flags & TDF_DETAILS))
3876     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
3877
3878   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
3879                                      diff, CHREC_RIGHT (*chrec_a));
3880   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
3881   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
3882                                      build_int_cst (type, 0),
3883                                      right_b);
3884   return true;
3885 }
3886
3887 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
3888    *OVERLAPS_B are initialized to the functions that describe the
3889    relation between the elements accessed twice by CHREC_A and
3890    CHREC_B.  For k >= 0, the following property is verified:
3891
3892    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3893
3894 static void
3895 analyze_siv_subscript (tree chrec_a,
3896                        tree chrec_b,
3897                        conflict_function **overlaps_a,
3898                        conflict_function **overlaps_b,
3899                        tree *last_conflicts,
3900                        int loop_nest_num)
3901 {
3902   dependence_stats.num_siv++;
3903
3904   if (dump_file && (dump_flags & TDF_DETAILS))
3905     fprintf (dump_file, "(analyze_siv_subscript \n");
3906
3907   if (evolution_function_is_constant_p (chrec_a)
3908       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3909     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
3910                                       overlaps_a, overlaps_b, last_conflicts);
3911
3912   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3913            && evolution_function_is_constant_p (chrec_b))
3914     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
3915                                       overlaps_b, overlaps_a, last_conflicts);
3916
3917   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3918            && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3919     {
3920       if (!chrec_contains_symbols (chrec_a)
3921           && !chrec_contains_symbols (chrec_b))
3922         {
3923           analyze_subscript_affine_affine (chrec_a, chrec_b,
3924                                            overlaps_a, overlaps_b,
3925                                            last_conflicts);
3926
3927           if (CF_NOT_KNOWN_P (*overlaps_a)
3928               || CF_NOT_KNOWN_P (*overlaps_b))
3929             dependence_stats.num_siv_unimplemented++;
3930           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3931                    || CF_NO_DEPENDENCE_P (*overlaps_b))
3932             dependence_stats.num_siv_independent++;
3933           else
3934             dependence_stats.num_siv_dependent++;
3935         }
3936       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
3937                                                         &chrec_b))
3938         {
3939           analyze_subscript_affine_affine (chrec_a, chrec_b,
3940                                            overlaps_a, overlaps_b,
3941                                            last_conflicts);
3942
3943           if (CF_NOT_KNOWN_P (*overlaps_a)
3944               || CF_NOT_KNOWN_P (*overlaps_b))
3945             dependence_stats.num_siv_unimplemented++;
3946           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3947                    || CF_NO_DEPENDENCE_P (*overlaps_b))
3948             dependence_stats.num_siv_independent++;
3949           else
3950             dependence_stats.num_siv_dependent++;
3951         }
3952       else
3953         goto siv_subscript_dontknow;
3954     }
3955
3956   else
3957     {
3958     siv_subscript_dontknow:;
3959       if (dump_file && (dump_flags & TDF_DETAILS))
3960         fprintf (dump_file, "  siv test failed: unimplemented");
3961       *overlaps_a = conflict_fn_not_known ();
3962       *overlaps_b = conflict_fn_not_known ();
3963       *last_conflicts = chrec_dont_know;
3964       dependence_stats.num_siv_unimplemented++;
3965     }
3966
3967   if (dump_file && (dump_flags & TDF_DETAILS))
3968     fprintf (dump_file, ")\n");
3969 }
3970
3971 /* Returns false if we can prove that the greatest common divisor of the steps
3972    of CHREC does not divide CST, false otherwise.  */
3973
3974 static bool
3975 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
3976 {
3977   HOST_WIDE_INT cd = 0, val;
3978   tree step;
3979
3980   if (!tree_fits_shwi_p (cst))
3981     return true;
3982   val = tree_to_shwi (cst);
3983
3984   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
3985     {
3986       step = CHREC_RIGHT (chrec);
3987       if (!tree_fits_shwi_p (step))
3988         return true;
3989       cd = gcd (cd, tree_to_shwi (step));
3990       chrec = CHREC_LEFT (chrec);
3991     }
3992
3993   return val % cd == 0;
3994 }
3995
3996 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
3997    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
3998    functions that describe the relation between the elements accessed
3999    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
4000    is verified:
4001
4002    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4003
4004 static void
4005 analyze_miv_subscript (tree chrec_a,
4006                        tree chrec_b,
4007                        conflict_function **overlaps_a,
4008                        conflict_function **overlaps_b,
4009                        tree *last_conflicts,
4010                        struct loop *loop_nest)
4011 {
4012   tree type, difference;
4013
4014   dependence_stats.num_miv++;
4015   if (dump_file && (dump_flags & TDF_DETAILS))
4016     fprintf (dump_file, "(analyze_miv_subscript \n");
4017
4018   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4019   chrec_a = chrec_convert (type, chrec_a, NULL);
4020   chrec_b = chrec_convert (type, chrec_b, NULL);
4021   difference = chrec_fold_minus (type, chrec_a, chrec_b);
4022
4023   if (eq_evolutions_p (chrec_a, chrec_b))
4024     {
4025       /* Access functions are the same: all the elements are accessed
4026          in the same order.  */
4027       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4028       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4029       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4030       dependence_stats.num_miv_dependent++;
4031     }
4032
4033   else if (evolution_function_is_constant_p (difference)
4034            && evolution_function_is_affine_multivariate_p (chrec_a,
4035                                                            loop_nest->num)
4036            && !gcd_of_steps_may_divide_p (chrec_a, difference))
4037     {
4038       /* testsuite/.../ssa-chrec-33.c
4039          {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
4040
4041          The difference is 1, and all the evolution steps are multiples
4042          of 2, consequently there are no overlapping elements.  */
4043       *overlaps_a = conflict_fn_no_dependence ();
4044       *overlaps_b = conflict_fn_no_dependence ();
4045       *last_conflicts = integer_zero_node;
4046       dependence_stats.num_miv_independent++;
4047     }
4048
4049   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4050            && !chrec_contains_symbols (chrec_a)
4051            && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4052            && !chrec_contains_symbols (chrec_b))
4053     {
4054       /* testsuite/.../ssa-chrec-35.c
4055          {0, +, 1}_2  vs.  {0, +, 1}_3
4056          the overlapping elements are respectively located at iterations:
4057          {0, +, 1}_x and {0, +, 1}_x,
4058          in other words, we have the equality:
4059          {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4060
4061          Other examples:
4062          {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4063          {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4064
4065          {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4066          {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4067       */
4068       analyze_subscript_affine_affine (chrec_a, chrec_b,
4069                                        overlaps_a, overlaps_b, last_conflicts);
4070
4071       if (CF_NOT_KNOWN_P (*overlaps_a)
4072           || CF_NOT_KNOWN_P (*overlaps_b))
4073         dependence_stats.num_miv_unimplemented++;
4074       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4075                || CF_NO_DEPENDENCE_P (*overlaps_b))
4076         dependence_stats.num_miv_independent++;
4077       else
4078         dependence_stats.num_miv_dependent++;
4079     }
4080
4081   else
4082     {
4083       /* When the analysis is too difficult, answer "don't know".  */
4084       if (dump_file && (dump_flags & TDF_DETAILS))
4085         fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4086
4087       *overlaps_a = conflict_fn_not_known ();
4088       *overlaps_b = conflict_fn_not_known ();
4089       *last_conflicts = chrec_dont_know;
4090       dependence_stats.num_miv_unimplemented++;
4091     }
4092
4093   if (dump_file && (dump_flags & TDF_DETAILS))
4094     fprintf (dump_file, ")\n");
4095 }
4096
4097 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4098    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
4099    OVERLAP_ITERATIONS_B are initialized with two functions that
4100    describe the iterations that contain conflicting elements.
4101
4102    Remark: For an integer k >= 0, the following equality is true:
4103
4104    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4105 */
4106
4107 static void
4108 analyze_overlapping_iterations (tree chrec_a,
4109                                 tree chrec_b,
4110                                 conflict_function **overlap_iterations_a,
4111                                 conflict_function **overlap_iterations_b,
4112                                 tree *last_conflicts, struct loop *loop_nest)
4113 {
4114   unsigned int lnn = loop_nest->num;
4115
4116   dependence_stats.num_subscript_tests++;
4117
4118   if (dump_file && (dump_flags & TDF_DETAILS))
4119     {
4120       fprintf (dump_file, "(analyze_overlapping_iterations \n");
4121       fprintf (dump_file, "  (chrec_a = ");
4122       print_generic_expr (dump_file, chrec_a);
4123       fprintf (dump_file, ")\n  (chrec_b = ");
4124       print_generic_expr (dump_file, chrec_b);
4125       fprintf (dump_file, ")\n");
4126     }
4127
4128   if (chrec_a == NULL_TREE
4129       || chrec_b == NULL_TREE
4130       || chrec_contains_undetermined (chrec_a)
4131       || chrec_contains_undetermined (chrec_b))
4132     {
4133       dependence_stats.num_subscript_undetermined++;
4134
4135       *overlap_iterations_a = conflict_fn_not_known ();
4136       *overlap_iterations_b = conflict_fn_not_known ();
4137     }
4138
4139   /* If they are the same chrec, and are affine, they overlap
4140      on every iteration.  */
4141   else if (eq_evolutions_p (chrec_a, chrec_b)
4142            && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4143                || operand_equal_p (chrec_a, chrec_b, 0)))
4144     {
4145       dependence_stats.num_same_subscript_function++;
4146       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4147       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4148       *last_conflicts = chrec_dont_know;
4149     }
4150
4151   /* If they aren't the same, and aren't affine, we can't do anything
4152      yet.  */
4153   else if ((chrec_contains_symbols (chrec_a)
4154             || chrec_contains_symbols (chrec_b))
4155            && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4156                || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4157     {
4158       dependence_stats.num_subscript_undetermined++;
4159       *overlap_iterations_a = conflict_fn_not_known ();
4160       *overlap_iterations_b = conflict_fn_not_known ();
4161     }
4162
4163   else if (ziv_subscript_p (chrec_a, chrec_b))
4164     analyze_ziv_subscript (chrec_a, chrec_b,
4165                            overlap_iterations_a, overlap_iterations_b,
4166                            last_conflicts);
4167
4168   else if (siv_subscript_p (chrec_a, chrec_b))
4169     analyze_siv_subscript (chrec_a, chrec_b,
4170                            overlap_iterations_a, overlap_iterations_b,
4171                            last_conflicts, lnn);
4172
4173   else
4174     analyze_miv_subscript (chrec_a, chrec_b,
4175                            overlap_iterations_a, overlap_iterations_b,
4176                            last_conflicts, loop_nest);
4177
4178   if (dump_file && (dump_flags & TDF_DETAILS))
4179     {
4180       fprintf (dump_file, "  (overlap_iterations_a = ");
4181       dump_conflict_function (dump_file, *overlap_iterations_a);
4182       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
4183       dump_conflict_function (dump_file, *overlap_iterations_b);
4184       fprintf (dump_file, "))\n");
4185     }
4186 }
4187
4188 /* Helper function for uniquely inserting distance vectors.  */
4189
4190 static void
4191 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
4192 {
4193   unsigned i;
4194   lambda_vector v;
4195
4196   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
4197     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
4198       return;
4199
4200   DDR_DIST_VECTS (ddr).safe_push (dist_v);
4201 }
4202
4203 /* Helper function for uniquely inserting direction vectors.  */
4204
4205 static void
4206 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
4207 {
4208   unsigned i;
4209   lambda_vector v;
4210
4211   FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
4212     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
4213       return;
4214
4215   DDR_DIR_VECTS (ddr).safe_push (dir_v);
4216 }
4217
4218 /* Add a distance of 1 on all the loops outer than INDEX.  If we
4219    haven't yet determined a distance for this outer loop, push a new
4220    distance vector composed of the previous distance, and a distance
4221    of 1 for this outer loop.  Example:
4222
4223    | loop_1
4224    |   loop_2
4225    |     A[10]
4226    |   endloop_2
4227    | endloop_1
4228
4229    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
4230    save (0, 1), then we have to save (1, 0).  */
4231
4232 static void
4233 add_outer_distances (struct data_dependence_relation *ddr,
4234                      lambda_vector dist_v, int index)
4235 {
4236   /* For each outer loop where init_v is not set, the accesses are
4237      in dependence of distance 1 in the loop.  */
4238   while (--index >= 0)
4239     {
4240       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4241       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4242       save_v[index] = 1;
4243       save_dist_v (ddr, save_v);
4244     }
4245 }
4246
4247 /* Return false when fail to represent the data dependence as a
4248    distance vector.  A_INDEX is the index of the first reference
4249    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
4250    second reference.  INIT_B is set to true when a component has been
4251    added to the distance vector DIST_V.  INDEX_CARRY is then set to
4252    the index in DIST_V that carries the dependence.  */
4253
4254 static bool
4255 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
4256                              unsigned int a_index, unsigned int b_index,
4257                              lambda_vector dist_v, bool *init_b,
4258                              int *index_carry)
4259 {
4260   unsigned i;
4261   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4262
4263   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4264     {
4265       tree access_fn_a, access_fn_b;
4266       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
4267
4268       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4269         {
4270           non_affine_dependence_relation (ddr);
4271           return false;
4272         }
4273
4274       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
4275       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
4276
4277       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
4278           && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
4279         {
4280           HOST_WIDE_INT dist;
4281           int index;
4282           int var_a = CHREC_VARIABLE (access_fn_a);
4283           int var_b = CHREC_VARIABLE (access_fn_b);
4284
4285           if (var_a != var_b
4286               || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4287             {
4288               non_affine_dependence_relation (ddr);
4289               return false;
4290             }
4291
4292           dist = int_cst_value (SUB_DISTANCE (subscript));
4293           index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
4294           *index_carry = MIN (index, *index_carry);
4295
4296           /* This is the subscript coupling test.  If we have already
4297              recorded a distance for this loop (a distance coming from
4298              another subscript), it should be the same.  For example,
4299              in the following code, there is no dependence:
4300
4301              | loop i = 0, N, 1
4302              |   T[i+1][i] = ...
4303              |   ... = T[i][i]
4304              | endloop
4305           */
4306           if (init_v[index] != 0 && dist_v[index] != dist)
4307             {
4308               finalize_ddr_dependent (ddr, chrec_known);
4309               return false;
4310             }
4311
4312           dist_v[index] = dist;
4313           init_v[index] = 1;
4314           *init_b = true;
4315         }
4316       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
4317         {
4318           /* This can be for example an affine vs. constant dependence
4319              (T[i] vs. T[3]) that is not an affine dependence and is
4320              not representable as a distance vector.  */
4321           non_affine_dependence_relation (ddr);
4322           return false;
4323         }
4324     }
4325
4326   return true;
4327 }
4328
4329 /* Return true when the DDR contains only constant access functions.  */
4330
4331 static bool
4332 constant_access_functions (const struct data_dependence_relation *ddr)
4333 {
4334   unsigned i;
4335   subscript *sub;
4336
4337   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4338     if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
4339         || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
4340       return false;
4341
4342   return true;
4343 }
4344
4345 /* Helper function for the case where DDR_A and DDR_B are the same
4346    multivariate access function with a constant step.  For an example
4347    see pr34635-1.c.  */
4348
4349 static void
4350 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
4351 {
4352   int x_1, x_2;
4353   tree c_1 = CHREC_LEFT (c_2);
4354   tree c_0 = CHREC_LEFT (c_1);
4355   lambda_vector dist_v;
4356   HOST_WIDE_INT v1, v2, cd;
4357
4358   /* Polynomials with more than 2 variables are not handled yet.  When
4359      the evolution steps are parameters, it is not possible to
4360      represent the dependence using classical distance vectors.  */
4361   if (TREE_CODE (c_0) != INTEGER_CST
4362       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
4363       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
4364     {
4365       DDR_AFFINE_P (ddr) = false;
4366       return;
4367     }
4368
4369   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
4370   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
4371
4372   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
4373   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4374   v1 = int_cst_value (CHREC_RIGHT (c_1));
4375   v2 = int_cst_value (CHREC_RIGHT (c_2));
4376   cd = gcd (v1, v2);
4377   v1 /= cd;
4378   v2 /= cd;
4379
4380   if (v2 < 0)
4381     {
4382       v2 = -v2;
4383       v1 = -v1;
4384     }
4385
4386   dist_v[x_1] = v2;
4387   dist_v[x_2] = -v1;
4388   save_dist_v (ddr, dist_v);
4389
4390   add_outer_distances (ddr, dist_v, x_1);
4391 }
4392
4393 /* Helper function for the case where DDR_A and DDR_B are the same
4394    access functions.  */
4395
4396 static void
4397 add_other_self_distances (struct data_dependence_relation *ddr)
4398 {
4399   lambda_vector dist_v;
4400   unsigned i;
4401   int index_carry = DDR_NB_LOOPS (ddr);
4402   subscript *sub;
4403
4404   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4405     {
4406       tree access_fun = SUB_ACCESS_FN (sub, 0);
4407
4408       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
4409         {
4410           if (!evolution_function_is_univariate_p (access_fun))
4411             {
4412               if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
4413                 {
4414                   DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
4415                   return;
4416                 }
4417
4418               access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
4419
4420               if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
4421                 add_multivariate_self_dist (ddr, access_fun);
4422               else
4423                 /* The evolution step is not constant: it varies in
4424                    the outer loop, so this cannot be represented by a
4425                    distance vector.  For example in pr34635.c the
4426                    evolution is {0, +, {0, +, 4}_1}_2.  */
4427                 DDR_AFFINE_P (ddr) = false;
4428
4429               return;
4430             }
4431
4432           index_carry = MIN (index_carry,
4433                              index_in_loop_nest (CHREC_VARIABLE (access_fun),
4434                                                  DDR_LOOP_NEST (ddr)));
4435         }
4436     }
4437
4438   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4439   add_outer_distances (ddr, dist_v, index_carry);
4440 }
4441
4442 static void
4443 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
4444 {
4445   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4446
4447   dist_v[DDR_INNER_LOOP (ddr)] = 1;
4448   save_dist_v (ddr, dist_v);
4449 }
4450
4451 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
4452    is the case for example when access functions are the same and
4453    equal to a constant, as in:
4454
4455    | loop_1
4456    |   A[3] = ...
4457    |   ... = A[3]
4458    | endloop_1
4459
4460    in which case the distance vectors are (0) and (1).  */
4461
4462 static void
4463 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
4464 {
4465   unsigned i, j;
4466
4467   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4468     {
4469       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
4470       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
4471       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
4472
4473       for (j = 0; j < ca->n; j++)
4474         if (affine_function_zero_p (ca->fns[j]))
4475           {
4476             insert_innermost_unit_dist_vector (ddr);
4477             return;
4478           }
4479
4480       for (j = 0; j < cb->n; j++)
4481         if (affine_function_zero_p (cb->fns[j]))
4482           {
4483             insert_innermost_unit_dist_vector (ddr);
4484             return;
4485           }
4486     }
4487 }
4488
4489 /* Return true when the DDR contains two data references that have the
4490    same access functions.  */
4491
4492 static inline bool
4493 same_access_functions (const struct data_dependence_relation *ddr)
4494 {
4495   unsigned i;
4496   subscript *sub;
4497
4498   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4499     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
4500                           SUB_ACCESS_FN (sub, 1)))
4501       return false;
4502
4503   return true;
4504 }
4505
4506 /* Compute the classic per loop distance vector.  DDR is the data
4507    dependence relation to build a vector from.  Return false when fail
4508    to represent the data dependence as a distance vector.  */
4509
4510 static bool
4511 build_classic_dist_vector (struct data_dependence_relation *ddr,
4512                            struct loop *loop_nest)
4513 {
4514   bool init_b = false;
4515   int index_carry = DDR_NB_LOOPS (ddr);
4516   lambda_vector dist_v;
4517
4518   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4519     return false;
4520
4521   if (same_access_functions (ddr))
4522     {
4523       /* Save the 0 vector.  */
4524       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4525       save_dist_v (ddr, dist_v);
4526
4527       if (constant_access_functions (ddr))
4528         add_distance_for_zero_overlaps (ddr);
4529
4530       if (DDR_NB_LOOPS (ddr) > 1)
4531         add_other_self_distances (ddr);
4532
4533       return true;
4534     }
4535
4536   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4537   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
4538     return false;
4539
4540   /* Save the distance vector if we initialized one.  */
4541   if (init_b)
4542     {
4543       /* Verify a basic constraint: classic distance vectors should
4544          always be lexicographically positive.
4545
4546          Data references are collected in the order of execution of
4547          the program, thus for the following loop
4548
4549          | for (i = 1; i < 100; i++)
4550          |   for (j = 1; j < 100; j++)
4551          |     {
4552          |       t = T[j+1][i-1];  // A
4553          |       T[j][i] = t + 2;  // B
4554          |     }
4555
4556          references are collected following the direction of the wind:
4557          A then B.  The data dependence tests are performed also
4558          following this order, such that we're looking at the distance
4559          separating the elements accessed by A from the elements later
4560          accessed by B.  But in this example, the distance returned by
4561          test_dep (A, B) is lexicographically negative (-1, 1), that
4562          means that the access A occurs later than B with respect to
4563          the outer loop, ie. we're actually looking upwind.  In this
4564          case we solve test_dep (B, A) looking downwind to the
4565          lexicographically positive solution, that returns the
4566          distance vector (1, -1).  */
4567       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
4568         {
4569           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4570           if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4571             return false;
4572           compute_subscript_distance (ddr);
4573           if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
4574                                             &index_carry))
4575             return false;
4576           save_dist_v (ddr, save_v);
4577           DDR_REVERSED_P (ddr) = true;
4578
4579           /* In this case there is a dependence forward for all the
4580              outer loops:
4581
4582              | for (k = 1; k < 100; k++)
4583              |  for (i = 1; i < 100; i++)
4584              |   for (j = 1; j < 100; j++)
4585              |     {
4586              |       t = T[j+1][i-1];  // A
4587              |       T[j][i] = t + 2;  // B
4588              |     }
4589
4590              the vectors are:
4591              (0,  1, -1)
4592              (1,  1, -1)
4593              (1, -1,  1)
4594           */
4595           if (DDR_NB_LOOPS (ddr) > 1)
4596             {
4597               add_outer_distances (ddr, save_v, index_carry);
4598               add_outer_distances (ddr, dist_v, index_carry);
4599             }
4600         }
4601       else
4602         {
4603           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4604           lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4605
4606           if (DDR_NB_LOOPS (ddr) > 1)
4607             {
4608               lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4609
4610               if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4611                 return false;
4612               compute_subscript_distance (ddr);
4613               if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
4614                                                 &index_carry))
4615                 return false;
4616
4617               save_dist_v (ddr, save_v);
4618               add_outer_distances (ddr, dist_v, index_carry);
4619               add_outer_distances (ddr, opposite_v, index_carry);
4620             }
4621           else
4622             save_dist_v (ddr, save_v);
4623         }
4624     }
4625   else
4626     {
4627       /* There is a distance of 1 on all the outer loops: Example:
4628          there is a dependence of distance 1 on loop_1 for the array A.
4629
4630          | loop_1
4631          |   A[5] = ...
4632          | endloop
4633       */
4634       add_outer_distances (ddr, dist_v,
4635                            lambda_vector_first_nz (dist_v,
4636                                                    DDR_NB_LOOPS (ddr), 0));
4637     }
4638
4639   if (dump_file && (dump_flags & TDF_DETAILS))
4640     {
4641       unsigned i;
4642
4643       fprintf (dump_file, "(build_classic_dist_vector\n");
4644       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4645         {
4646           fprintf (dump_file, "  dist_vector = (");
4647           print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
4648                                DDR_NB_LOOPS (ddr));
4649           fprintf (dump_file, "  )\n");
4650         }
4651       fprintf (dump_file, ")\n");
4652     }
4653
4654   return true;
4655 }
4656
4657 /* Return the direction for a given distance.
4658    FIXME: Computing dir this way is suboptimal, since dir can catch
4659    cases that dist is unable to represent.  */
4660
4661 static inline enum data_dependence_direction
4662 dir_from_dist (int dist)
4663 {
4664   if (dist > 0)
4665     return dir_positive;
4666   else if (dist < 0)
4667     return dir_negative;
4668   else
4669     return dir_equal;
4670 }
4671
4672 /* Compute the classic per loop direction vector.  DDR is the data
4673    dependence relation to build a vector from.  */
4674
4675 static void
4676 build_classic_dir_vector (struct data_dependence_relation *ddr)
4677 {
4678   unsigned i, j;
4679   lambda_vector dist_v;
4680
4681   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
4682     {
4683       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4684
4685       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4686         dir_v[j] = dir_from_dist (dist_v[j]);
4687
4688       save_dir_v (ddr, dir_v);
4689     }
4690 }
4691
4692 /* Helper function.  Returns true when there is a dependence between the
4693    data references.  A_INDEX is the index of the first reference (0 for
4694    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
4695
4696 static bool
4697 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
4698                                unsigned int a_index, unsigned int b_index,
4699                                struct loop *loop_nest)
4700 {
4701   unsigned int i;
4702   tree last_conflicts;
4703   struct subscript *subscript;
4704   tree res = NULL_TREE;
4705
4706   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
4707     {
4708       conflict_function *overlaps_a, *overlaps_b;
4709
4710       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
4711                                       SUB_ACCESS_FN (subscript, b_index),
4712                                       &overlaps_a, &overlaps_b,
4713                                       &last_conflicts, loop_nest);
4714
4715       if (SUB_CONFLICTS_IN_A (subscript))
4716         free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4717       if (SUB_CONFLICTS_IN_B (subscript))
4718         free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4719
4720       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
4721       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
4722       SUB_LAST_CONFLICT (subscript) = last_conflicts;
4723
4724       /* If there is any undetermined conflict function we have to
4725          give a conservative answer in case we cannot prove that
4726          no dependence exists when analyzing another subscript.  */
4727       if (CF_NOT_KNOWN_P (overlaps_a)
4728           || CF_NOT_KNOWN_P (overlaps_b))
4729         {
4730           res = chrec_dont_know;
4731           continue;
4732         }
4733
4734       /* When there is a subscript with no dependence we can stop.  */
4735       else if (CF_NO_DEPENDENCE_P (overlaps_a)
4736                || CF_NO_DEPENDENCE_P (overlaps_b))
4737         {
4738           res = chrec_known;
4739           break;
4740         }
4741     }
4742
4743   if (res == NULL_TREE)
4744     return true;
4745
4746   if (res == chrec_known)
4747     dependence_stats.num_dependence_independent++;
4748   else
4749     dependence_stats.num_dependence_undetermined++;
4750   finalize_ddr_dependent (ddr, res);
4751   return false;
4752 }
4753
4754 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
4755
4756 static void
4757 subscript_dependence_tester (struct data_dependence_relation *ddr,
4758                              struct loop *loop_nest)
4759 {
4760   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
4761     dependence_stats.num_dependence_dependent++;
4762
4763   compute_subscript_distance (ddr);
4764   if (build_classic_dist_vector (ddr, loop_nest))
4765     build_classic_dir_vector (ddr);
4766 }
4767
4768 /* Returns true when all the access functions of A are affine or
4769    constant with respect to LOOP_NEST.  */
4770
4771 static bool
4772 access_functions_are_affine_or_constant_p (const struct data_reference *a,
4773                                            const struct loop *loop_nest)
4774 {
4775   unsigned int i;
4776   vec<tree> fns = DR_ACCESS_FNS (a);
4777   tree t;
4778
4779   FOR_EACH_VEC_ELT (fns, i, t)
4780     if (!evolution_function_is_invariant_p (t, loop_nest->num)
4781         && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
4782       return false;
4783
4784   return true;
4785 }
4786
4787 /* This computes the affine dependence relation between A and B with
4788    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
4789    independence between two accesses, while CHREC_DONT_KNOW is used
4790    for representing the unknown relation.
4791
4792    Note that it is possible to stop the computation of the dependence
4793    relation the first time we detect a CHREC_KNOWN element for a given
4794    subscript.  */
4795
4796 void
4797 compute_affine_dependence (struct data_dependence_relation *ddr,
4798                            struct loop *loop_nest)
4799 {
4800   struct data_reference *dra = DDR_A (ddr);
4801   struct data_reference *drb = DDR_B (ddr);
4802
4803   if (dump_file && (dump_flags & TDF_DETAILS))
4804     {
4805       fprintf (dump_file, "(compute_affine_dependence\n");
4806       fprintf (dump_file, "  stmt_a: ");
4807       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4808       fprintf (dump_file, "  stmt_b: ");
4809       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4810     }
4811
4812   /* Analyze only when the dependence relation is not yet known.  */
4813   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4814     {
4815       dependence_stats.num_dependence_tests++;
4816
4817       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4818           && access_functions_are_affine_or_constant_p (drb, loop_nest))
4819         subscript_dependence_tester (ddr, loop_nest);
4820
4821       /* As a last case, if the dependence cannot be determined, or if
4822          the dependence is considered too difficult to determine, answer
4823          "don't know".  */
4824       else
4825         {
4826           dependence_stats.num_dependence_undetermined++;
4827
4828           if (dump_file && (dump_flags & TDF_DETAILS))
4829             {
4830               fprintf (dump_file, "Data ref a:\n");
4831               dump_data_reference (dump_file, dra);
4832               fprintf (dump_file, "Data ref b:\n");
4833               dump_data_reference (dump_file, drb);
4834               fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4835             }
4836           finalize_ddr_dependent (ddr, chrec_dont_know);
4837         }
4838     }
4839
4840   if (dump_file && (dump_flags & TDF_DETAILS))
4841     {
4842       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4843         fprintf (dump_file, ") -> no dependence\n");
4844       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4845         fprintf (dump_file, ") -> dependence analysis failed\n");
4846       else
4847         fprintf (dump_file, ")\n");
4848     }
4849 }
4850
4851 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4852    the data references in DATAREFS, in the LOOP_NEST.  When
4853    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4854    relations.  Return true when successful, i.e. data references number
4855    is small enough to be handled.  */
4856
4857 bool
4858 compute_all_dependences (vec<data_reference_p> datarefs,
4859                          vec<ddr_p> *dependence_relations,
4860                          vec<loop_p> loop_nest,
4861                          bool compute_self_and_rr)
4862 {
4863   struct data_dependence_relation *ddr;
4864   struct data_reference *a, *b;
4865   unsigned int i, j;
4866
4867   if ((int) datarefs.length ()
4868       > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4869     {
4870       struct data_dependence_relation *ddr;
4871
4872       /* Insert a single relation into dependence_relations:
4873          chrec_dont_know.  */
4874       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4875       dependence_relations->safe_push (ddr);
4876       return false;
4877     }
4878
4879   FOR_EACH_VEC_ELT (datarefs, i, a)
4880     for (j = i + 1; datarefs.iterate (j, &b); j++)
4881       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4882         {
4883           ddr = initialize_data_dependence_relation (a, b, loop_nest);
4884           dependence_relations->safe_push (ddr);
4885           if (loop_nest.exists ())
4886             compute_affine_dependence (ddr, loop_nest[0]);
4887         }
4888
4889   if (compute_self_and_rr)
4890     FOR_EACH_VEC_ELT (datarefs, i, a)
4891       {
4892         ddr = initialize_data_dependence_relation (a, a, loop_nest);
4893         dependence_relations->safe_push (ddr);
4894         if (loop_nest.exists ())
4895           compute_affine_dependence (ddr, loop_nest[0]);
4896       }
4897
4898   return true;
4899 }
4900
4901 /* Describes a location of a memory reference.  */
4902
4903 struct data_ref_loc
4904 {
4905   /* The memory reference.  */
4906   tree ref;
4907
4908   /* True if the memory reference is read.  */
4909   bool is_read;
4910
4911   /* True if the data reference is conditional within the containing
4912      statement, i.e. if it might not occur even when the statement
4913      is executed and runs to completion.  */
4914   bool is_conditional_in_stmt;
4915 };
4916
4917
4918 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
4919    true if STMT clobbers memory, false otherwise.  */
4920
4921 static bool
4922 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
4923 {
4924   bool clobbers_memory = false;
4925   data_ref_loc ref;
4926   tree op0, op1;
4927   enum gimple_code stmt_code = gimple_code (stmt);
4928
4929   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4930      As we cannot model data-references to not spelled out
4931      accesses give up if they may occur.  */
4932   if (stmt_code == GIMPLE_CALL
4933       && !(gimple_call_flags (stmt) & ECF_CONST))
4934     {
4935       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
4936       if (gimple_call_internal_p (stmt))
4937         switch (gimple_call_internal_fn (stmt))
4938           {
4939           case IFN_GOMP_SIMD_LANE:
4940             {
4941               struct loop *loop = gimple_bb (stmt)->loop_father;
4942               tree uid = gimple_call_arg (stmt, 0);
4943               gcc_assert (TREE_CODE (uid) == SSA_NAME);
4944               if (loop == NULL
4945                   || loop->simduid != SSA_NAME_VAR (uid))
4946                 clobbers_memory = true;
4947               break;
4948             }
4949           case IFN_MASK_LOAD:
4950           case IFN_MASK_STORE:
4951             break;
4952           default:
4953             clobbers_memory = true;
4954             break;
4955           }
4956       else
4957         clobbers_memory = true;
4958     }
4959   else if (stmt_code == GIMPLE_ASM
4960            && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
4961                || gimple_vuse (stmt)))
4962     clobbers_memory = true;
4963
4964   if (!gimple_vuse (stmt))
4965     return clobbers_memory;
4966
4967   if (stmt_code == GIMPLE_ASSIGN)
4968     {
4969       tree base;
4970       op0 = gimple_assign_lhs (stmt);
4971       op1 = gimple_assign_rhs1 (stmt);
4972
4973       if (DECL_P (op1)
4974           || (REFERENCE_CLASS_P (op1)
4975               && (base = get_base_address (op1))
4976               && TREE_CODE (base) != SSA_NAME
4977               && !is_gimple_min_invariant (base)))
4978         {
4979           ref.ref = op1;
4980           ref.is_read = true;
4981           ref.is_conditional_in_stmt = false;
4982           references->safe_push (ref);
4983         }
4984     }
4985   else if (stmt_code == GIMPLE_CALL)
4986     {
4987       unsigned i, n;
4988       tree ptr, type;
4989       unsigned int align;
4990
4991       ref.is_read = false;
4992       if (gimple_call_internal_p (stmt))
4993         switch (gimple_call_internal_fn (stmt))
4994           {
4995           case IFN_MASK_LOAD:
4996             if (gimple_call_lhs (stmt) == NULL_TREE)
4997               break;
4998             ref.is_read = true;
4999             /* FALLTHRU */
5000           case IFN_MASK_STORE:
5001             ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5002             align = tree_to_shwi (gimple_call_arg (stmt, 1));
5003             if (ref.is_read)
5004               type = TREE_TYPE (gimple_call_lhs (stmt));
5005             else
5006               type = TREE_TYPE (gimple_call_arg (stmt, 3));
5007             if (TYPE_ALIGN (type) != align)
5008               type = build_aligned_type (type, align);
5009             ref.is_conditional_in_stmt = true;
5010             ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5011                                    ptr);
5012             references->safe_push (ref);
5013             return false;
5014           default:
5015             break;
5016           }
5017
5018       op0 = gimple_call_lhs (stmt);
5019       n = gimple_call_num_args (stmt);
5020       for (i = 0; i < n; i++)
5021         {
5022           op1 = gimple_call_arg (stmt, i);
5023
5024           if (DECL_P (op1)
5025               || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5026             {
5027               ref.ref = op1;
5028               ref.is_read = true;
5029               ref.is_conditional_in_stmt = false;
5030               references->safe_push (ref);
5031             }
5032         }
5033     }
5034   else
5035     return clobbers_memory;
5036
5037   if (op0
5038       && (DECL_P (op0)
5039           || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5040     {
5041       ref.ref = op0;
5042       ref.is_read = false;
5043       ref.is_conditional_in_stmt = false;
5044       references->safe_push (ref);
5045     }
5046   return clobbers_memory;
5047 }
5048
5049
5050 /* Returns true if the loop-nest has any data reference.  */
5051
5052 bool
5053 loop_nest_has_data_refs (loop_p loop)
5054 {
5055   basic_block *bbs = get_loop_body (loop);
5056   auto_vec<data_ref_loc, 3> references;
5057
5058   for (unsigned i = 0; i < loop->num_nodes; i++)
5059     {
5060       basic_block bb = bbs[i];
5061       gimple_stmt_iterator bsi;
5062
5063       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5064         {
5065           gimple *stmt = gsi_stmt (bsi);
5066           get_references_in_stmt (stmt, &references);
5067           if (references.length ())
5068             {
5069               free (bbs);
5070               return true;
5071             }
5072         }
5073     }
5074   free (bbs);
5075   return false;
5076 }
5077
5078 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
5079    reference, returns false, otherwise returns true.  NEST is the outermost
5080    loop of the loop nest in which the references should be analyzed.  */
5081
5082 opt_result
5083 find_data_references_in_stmt (struct loop *nest, gimple *stmt,
5084                               vec<data_reference_p> *datarefs)
5085 {
5086   unsigned i;
5087   auto_vec<data_ref_loc, 2> references;
5088   data_ref_loc *ref;
5089   data_reference_p dr;
5090
5091   if (get_references_in_stmt (stmt, &references))
5092     return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5093                                    stmt);
5094
5095   FOR_EACH_VEC_ELT (references, i, ref)
5096     {
5097       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5098                             loop_containing_stmt (stmt), ref->ref,
5099                             stmt, ref->is_read, ref->is_conditional_in_stmt);
5100       gcc_assert (dr != NULL);
5101       datarefs->safe_push (dr);
5102     }
5103
5104   return opt_result::success ();
5105 }
5106
5107 /* Stores the data references in STMT to DATAREFS.  If there is an
5108    unanalyzable reference, returns false, otherwise returns true.
5109    NEST is the outermost loop of the loop nest in which the references
5110    should be instantiated, LOOP is the loop in which the references
5111    should be analyzed.  */
5112
5113 bool
5114 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5115                                        vec<data_reference_p> *datarefs)
5116 {
5117   unsigned i;
5118   auto_vec<data_ref_loc, 2> references;
5119   data_ref_loc *ref;
5120   bool ret = true;
5121   data_reference_p dr;
5122
5123   if (get_references_in_stmt (stmt, &references))
5124     return false;
5125
5126   FOR_EACH_VEC_ELT (references, i, ref)
5127     {
5128       dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
5129                             ref->is_conditional_in_stmt);
5130       gcc_assert (dr != NULL);
5131       datarefs->safe_push (dr);
5132     }
5133
5134   return ret;
5135 }
5136
5137 /* Search the data references in LOOP, and record the information into
5138    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5139    difficult case, returns NULL_TREE otherwise.  */
5140
5141 tree
5142 find_data_references_in_bb (struct loop *loop, basic_block bb,
5143                             vec<data_reference_p> *datarefs)
5144 {
5145   gimple_stmt_iterator bsi;
5146
5147   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5148     {
5149       gimple *stmt = gsi_stmt (bsi);
5150
5151       if (!find_data_references_in_stmt (loop, stmt, datarefs))
5152         {
5153           struct data_reference *res;
5154           res = XCNEW (struct data_reference);
5155           datarefs->safe_push (res);
5156
5157           return chrec_dont_know;
5158         }
5159     }
5160
5161   return NULL_TREE;
5162 }
5163
5164 /* Search the data references in LOOP, and record the information into
5165    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5166    difficult case, returns NULL_TREE otherwise.
5167
5168    TODO: This function should be made smarter so that it can handle address
5169    arithmetic as if they were array accesses, etc.  */
5170
5171 tree
5172 find_data_references_in_loop (struct loop *loop,
5173                               vec<data_reference_p> *datarefs)
5174 {
5175   basic_block bb, *bbs;
5176   unsigned int i;
5177
5178   bbs = get_loop_body_in_dom_order (loop);
5179
5180   for (i = 0; i < loop->num_nodes; i++)
5181     {
5182       bb = bbs[i];
5183
5184       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
5185         {
5186           free (bbs);
5187           return chrec_dont_know;
5188         }
5189     }
5190   free (bbs);
5191
5192   return NULL_TREE;
5193 }
5194
5195 /* Return the alignment in bytes that DRB is guaranteed to have at all
5196    times.  */
5197
5198 unsigned int
5199 dr_alignment (innermost_loop_behavior *drb)
5200 {
5201   /* Get the alignment of BASE_ADDRESS + INIT.  */
5202   unsigned int alignment = drb->base_alignment;
5203   unsigned int misalignment = (drb->base_misalignment
5204                                + TREE_INT_CST_LOW (drb->init));
5205   if (misalignment != 0)
5206     alignment = MIN (alignment, misalignment & -misalignment);
5207
5208   /* Cap it to the alignment of OFFSET.  */
5209   if (!integer_zerop (drb->offset))
5210     alignment = MIN (alignment, drb->offset_alignment);
5211
5212   /* Cap it to the alignment of STEP.  */
5213   if (!integer_zerop (drb->step))
5214     alignment = MIN (alignment, drb->step_alignment);
5215
5216   return alignment;
5217 }
5218
5219 /* If BASE is a pointer-typed SSA name, try to find the object that it
5220    is based on.  Return this object X on success and store the alignment
5221    in bytes of BASE - &X in *ALIGNMENT_OUT.  */
5222
5223 static tree
5224 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
5225 {
5226   if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
5227     return NULL_TREE;
5228
5229   gimple *def = SSA_NAME_DEF_STMT (base);
5230   base = analyze_scalar_evolution (loop_containing_stmt (def), base);
5231
5232   /* Peel chrecs and record the minimum alignment preserved by
5233      all steps.  */
5234   unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5235   while (TREE_CODE (base) == POLYNOMIAL_CHREC)
5236     {
5237       unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
5238       alignment = MIN (alignment, step_alignment);
5239       base = CHREC_LEFT (base);
5240     }
5241
5242   /* Punt if the expression is too complicated to handle.  */
5243   if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
5244     return NULL_TREE;
5245
5246   /* The only useful cases are those for which a dereference folds to something
5247      other than an INDIRECT_REF.  */
5248   tree ref_type = TREE_TYPE (TREE_TYPE (base));
5249   tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
5250   if (!ref)
5251     return NULL_TREE;
5252
5253   /* Analyze the base to which the steps we peeled were applied.  */
5254   poly_int64 bitsize, bitpos, bytepos;
5255   machine_mode mode;
5256   int unsignedp, reversep, volatilep;
5257   tree offset;
5258   base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
5259                               &unsignedp, &reversep, &volatilep);
5260   if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
5261     return NULL_TREE;
5262
5263   /* Restrict the alignment to that guaranteed by the offsets.  */
5264   unsigned int bytepos_alignment = known_alignment (bytepos);
5265   if (bytepos_alignment != 0)
5266     alignment = MIN (alignment, bytepos_alignment);
5267   if (offset)
5268     {
5269       unsigned int offset_alignment = highest_pow2_factor (offset);
5270       alignment = MIN (alignment, offset_alignment);
5271     }
5272
5273   *alignment_out = alignment;
5274   return base;
5275 }
5276
5277 /* Return the object whose alignment would need to be changed in order
5278    to increase the alignment of ADDR.  Store the maximum achievable
5279    alignment in *MAX_ALIGNMENT.  */
5280
5281 tree
5282 get_base_for_alignment (tree addr, unsigned int *max_alignment)
5283 {
5284   tree base = get_base_for_alignment_1 (addr, max_alignment);
5285   if (base)
5286     return base;
5287
5288   if (TREE_CODE (addr) == ADDR_EXPR)
5289     addr = TREE_OPERAND (addr, 0);
5290   *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5291   return addr;
5292 }
5293
5294 /* Recursive helper function.  */
5295
5296 static bool
5297 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
5298 {
5299   /* Inner loops of the nest should not contain siblings.  Example:
5300      when there are two consecutive loops,
5301
5302      | loop_0
5303      |   loop_1
5304      |     A[{0, +, 1}_1]
5305      |   endloop_1
5306      |   loop_2
5307      |     A[{0, +, 1}_2]
5308      |   endloop_2
5309      | endloop_0
5310
5311      the dependence relation cannot be captured by the distance
5312      abstraction.  */
5313   if (loop->next)
5314     return false;
5315
5316   loop_nest->safe_push (loop);
5317   if (loop->inner)
5318     return find_loop_nest_1 (loop->inner, loop_nest);
5319   return true;
5320 }
5321
5322 /* Return false when the LOOP is not well nested.  Otherwise return
5323    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
5324    contain the loops from the outermost to the innermost, as they will
5325    appear in the classic distance vector.  */
5326
5327 bool
5328 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
5329 {
5330   loop_nest->safe_push (loop);
5331   if (loop->inner)
5332     return find_loop_nest_1 (loop->inner, loop_nest);
5333   return true;
5334 }
5335
5336 /* Returns true when the data dependences have been computed, false otherwise.
5337    Given a loop nest LOOP, the following vectors are returned:
5338    DATAREFS is initialized to all the array elements contained in this loop,
5339    DEPENDENCE_RELATIONS contains the relations between the data references.
5340    Compute read-read and self relations if
5341    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
5342
5343 bool
5344 compute_data_dependences_for_loop (struct loop *loop,
5345                                    bool compute_self_and_read_read_dependences,
5346                                    vec<loop_p> *loop_nest,
5347                                    vec<data_reference_p> *datarefs,
5348                                    vec<ddr_p> *dependence_relations)
5349 {
5350   bool res = true;
5351
5352   memset (&dependence_stats, 0, sizeof (dependence_stats));
5353
5354   /* If the loop nest is not well formed, or one of the data references
5355      is not computable, give up without spending time to compute other
5356      dependences.  */
5357   if (!loop
5358       || !find_loop_nest (loop, loop_nest)
5359       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
5360       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
5361                                    compute_self_and_read_read_dependences))
5362     res = false;
5363
5364   if (dump_file && (dump_flags & TDF_STATS))
5365     {
5366       fprintf (dump_file, "Dependence tester statistics:\n");
5367
5368       fprintf (dump_file, "Number of dependence tests: %d\n",
5369                dependence_stats.num_dependence_tests);
5370       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
5371                dependence_stats.num_dependence_dependent);
5372       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
5373                dependence_stats.num_dependence_independent);
5374       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
5375                dependence_stats.num_dependence_undetermined);
5376
5377       fprintf (dump_file, "Number of subscript tests: %d\n",
5378                dependence_stats.num_subscript_tests);
5379       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
5380                dependence_stats.num_subscript_undetermined);
5381       fprintf (dump_file, "Number of same subscript function: %d\n",
5382                dependence_stats.num_same_subscript_function);
5383
5384       fprintf (dump_file, "Number of ziv tests: %d\n",
5385                dependence_stats.num_ziv);
5386       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
5387                dependence_stats.num_ziv_dependent);
5388       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
5389                dependence_stats.num_ziv_independent);
5390       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
5391                dependence_stats.num_ziv_unimplemented);
5392
5393       fprintf (dump_file, "Number of siv tests: %d\n",
5394                dependence_stats.num_siv);
5395       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
5396                dependence_stats.num_siv_dependent);
5397       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
5398                dependence_stats.num_siv_independent);
5399       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
5400                dependence_stats.num_siv_unimplemented);
5401
5402       fprintf (dump_file, "Number of miv tests: %d\n",
5403                dependence_stats.num_miv);
5404       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
5405                dependence_stats.num_miv_dependent);
5406       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
5407                dependence_stats.num_miv_independent);
5408       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
5409                dependence_stats.num_miv_unimplemented);
5410     }
5411
5412   return res;
5413 }
5414
5415 /* Free the memory used by a data dependence relation DDR.  */
5416
5417 void
5418 free_dependence_relation (struct data_dependence_relation *ddr)
5419 {
5420   if (ddr == NULL)
5421     return;
5422
5423   if (DDR_SUBSCRIPTS (ddr).exists ())
5424     free_subscripts (DDR_SUBSCRIPTS (ddr));
5425   DDR_DIST_VECTS (ddr).release ();
5426   DDR_DIR_VECTS (ddr).release ();
5427
5428   free (ddr);
5429 }
5430
5431 /* Free the memory used by the data dependence relations from
5432    DEPENDENCE_RELATIONS.  */
5433
5434 void
5435 free_dependence_relations (vec<ddr_p> dependence_relations)
5436 {
5437   unsigned int i;
5438   struct data_dependence_relation *ddr;
5439
5440   FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5441     if (ddr)
5442       free_dependence_relation (ddr);
5443
5444   dependence_relations.release ();
5445 }
5446
5447 /* Free the memory used by the data references from DATAREFS.  */
5448
5449 void
5450 free_data_refs (vec<data_reference_p> datarefs)
5451 {
5452   unsigned int i;
5453   struct data_reference *dr;
5454
5455   FOR_EACH_VEC_ELT (datarefs, i, dr)
5456     free_data_ref (dr);
5457   datarefs.release ();
5458 }
5459
5460 /* Common routine implementing both dr_direction_indicator and
5461    dr_zero_step_indicator.  Return USEFUL_MIN if the indicator is known
5462    to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
5463    Return the step as the indicator otherwise.  */
5464
5465 static tree
5466 dr_step_indicator (struct data_reference *dr, int useful_min)
5467 {
5468   tree step = DR_STEP (dr);
5469   if (!step)
5470     return NULL_TREE;
5471   STRIP_NOPS (step);
5472   /* Look for cases where the step is scaled by a positive constant
5473      integer, which will often be the access size.  If the multiplication
5474      doesn't change the sign (due to overflow effects) then we can
5475      test the unscaled value instead.  */
5476   if (TREE_CODE (step) == MULT_EXPR
5477       && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
5478       && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
5479     {
5480       tree factor = TREE_OPERAND (step, 1);
5481       step = TREE_OPERAND (step, 0);
5482
5483       /* Strip widening and truncating conversions as well as nops.  */
5484       if (CONVERT_EXPR_P (step)
5485           && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
5486         step = TREE_OPERAND (step, 0);
5487       tree type = TREE_TYPE (step);
5488
5489       /* Get the range of step values that would not cause overflow.  */
5490       widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
5491                          / wi::to_widest (factor));
5492       widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
5493                          / wi::to_widest (factor));
5494
5495       /* Get the range of values that the unconverted step actually has.  */
5496       wide_int step_min, step_max;
5497       if (TREE_CODE (step) != SSA_NAME
5498           || get_range_info (step, &step_min, &step_max) != VR_RANGE)
5499         {
5500           step_min = wi::to_wide (TYPE_MIN_VALUE (type));
5501           step_max = wi::to_wide (TYPE_MAX_VALUE (type));
5502         }
5503
5504       /* Check whether the unconverted step has an acceptable range.  */
5505       signop sgn = TYPE_SIGN (type);
5506       if (wi::les_p (minv, widest_int::from (step_min, sgn))
5507           && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
5508         {
5509           if (wi::ge_p (step_min, useful_min, sgn))
5510             return ssize_int (useful_min);
5511           else if (wi::lt_p (step_max, 0, sgn))
5512             return ssize_int (-1);
5513           else
5514             return fold_convert (ssizetype, step);
5515         }
5516     }
5517   return DR_STEP (dr);
5518 }
5519
5520 /* Return a value that is negative iff DR has a negative step.  */
5521
5522 tree
5523 dr_direction_indicator (struct data_reference *dr)
5524 {
5525   return dr_step_indicator (dr, 0);
5526 }
5527
5528 /* Return a value that is zero iff DR has a zero step.  */
5529
5530 tree
5531 dr_zero_step_indicator (struct data_reference *dr)
5532 {
5533   return dr_step_indicator (dr, 1);
5534 }
5535
5536 /* Return true if DR is known to have a nonnegative (but possibly zero)
5537    step.  */
5538
5539 bool
5540 dr_known_forward_stride_p (struct data_reference *dr)
5541 {
5542   tree indicator = dr_direction_indicator (dr);
5543   tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
5544                                    fold_convert (ssizetype, indicator),
5545                                    ssize_int (0));
5546   return neg_step_val && integer_zerop (neg_step_val);
5547 }