gcc/tree-data-ref.cc

   1 /* Data references and dependences detectors.
   2    Copyright (C) 2003-2022 Free Software Foundation, Inc.
   3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass walks a given loop structure searching for array
  22    references.  The information about the array accesses is recorded
  23    in DATA_REFERENCE structures.
  24
  25    The basic test for determining the dependences is:
  26    given two access functions chrec1 and chrec2 to a same array, and
  27    x and y two vectors from the iteration domain, the same element of
  28    the array is accessed twice at iterations x and y if and only if:
  29    |             chrec1 (x) == chrec2 (y).
  30
  31    The goals of this analysis are:
  32
  33    - to determine the independence: the relation between two
  34      independent accesses is qualified with the chrec_known (this
  35      information allows a loop parallelization),
  36
  37    - when two data references access the same data, to qualify the
  38      dependence relation with classic dependence representations:
  39
  40        - distance vectors
  41        - direction vectors
  42        - loop carried level dependence
  43        - polyhedron dependence
  44      or with the chains of recurrences based representation,
  45
  46    - to define a knowledge base for storing the data dependence
  47      information,
  48
  49    - to define an interface to access this data.
  50
  51
  52    Definitions:
  53
  54    - subscript: given two array accesses a subscript is the tuple
  55    composed of the access functions for a given dimension.  Example:
  56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
  57    (f1, g1), (f2, g2), (f3, g3).
  58
  59    - Diophantine equation: an equation whose coefficients and
  60    solutions are integer constants, for example the equation
  61    |   3*x + 2*y = 1
  62    has an integer solution x = 1 and y = -1.
  63
  64    References:
  65
  66    - "Advanced Compilation for High Performance Computing" by Randy
  67    Allen and Ken Kennedy.
  68    http://citeseer.ist.psu.edu/goff91practical.html
  69
  70    - "Loop Transformations for Restructuring Compilers - The Foundations"
  71    by Utpal Banerjee.
  72
  73
  74 */
  75
  76 #include "config.h"
  77 #include "system.h"
  78 #include "coretypes.h"
  79 #include "backend.h"
  80 #include "rtl.h"
  81 #include "tree.h"
  82 #include "gimple.h"
  83 #include "gimple-pretty-print.h"
  84 #include "alias.h"
  85 #include "fold-const.h"
  86 #include "expr.h"
  87 #include "gimple-iterator.h"
  88 #include "tree-ssa-loop-niter.h"
  89 #include "tree-ssa-loop.h"
  90 #include "tree-ssa.h"
  91 #include "cfgloop.h"
  92 #include "tree-data-ref.h"
  93 #include "tree-scalar-evolution.h"
  94 #include "dumpfile.h"
  95 #include "tree-affine.h"
  96 #include "builtins.h"
  97 #include "tree-eh.h"
  98 #include "ssa.h"
  99 #include "internal-fn.h"
 100 #include "vr-values.h"
 101 #include "range-op.h"
 102 #include "tree-ssa-loop-ivopts.h"
 103
 104 static struct datadep_stats
 105 {
 106   int num_dependence_tests;
 107   int num_dependence_dependent;
 108   int num_dependence_independent;
 109   int num_dependence_undetermined;
 110
 111   int num_subscript_tests;
 112   int num_subscript_undetermined;
 113   int num_same_subscript_function;
 114
 115   int num_ziv;
 116   int num_ziv_independent;
 117   int num_ziv_dependent;
 118   int num_ziv_unimplemented;
 119
 120   int num_siv;
 121   int num_siv_independent;
 122   int num_siv_dependent;
 123   int num_siv_unimplemented;
 124
 125   int num_miv;
 126   int num_miv_independent;
 127   int num_miv_dependent;
 128   int num_miv_unimplemented;
 129 } dependence_stats;
 130
 131 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
 132                                            unsigned int, unsigned int,
 133                                            class loop *);
 134 /* Returns true iff A divides B.  */
 135
 136 static inline bool
 137 tree_fold_divides_p (const_tree a, const_tree b)
 138 {
 139   gcc_assert (TREE_CODE (a) == INTEGER_CST);
 140   gcc_assert (TREE_CODE (b) == INTEGER_CST);
 141   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
 142 }
 143
 144 /* Returns true iff A divides B.  */
 145
 146 static inline bool
 147 int_divides_p (lambda_int a, lambda_int b)
 148 {
 149   return ((b % a) == 0);
 150 }
 151
 152 /* Return true if reference REF contains a union access.  */
 153
 154 static bool
 155 ref_contains_union_access_p (tree ref)
 156 {
 157   while (handled_component_p (ref))
 158     {
 159       ref = TREE_OPERAND (ref, 0);
 160       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
 161           || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
 162         return true;
 163     }
 164   return false;
 165 }
 166
 167 \f
 168
 169 /* Dump into FILE all the data references from DATAREFS.  */
 170
 171 static void
 172 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
 173 {
 174   for (data_reference *dr : datarefs)
 175     dump_data_reference (file, dr);
 176 }
 177
 178 /* Unified dump into FILE all the data references from DATAREFS.  */
 179
 180 DEBUG_FUNCTION void
 181 debug (vec<data_reference_p> &ref)
 182 {
 183   dump_data_references (stderr, ref);
 184 }
 185
 186 DEBUG_FUNCTION void
 187 debug (vec<data_reference_p> *ptr)
 188 {
 189   if (ptr)
 190     debug (*ptr);
 191   else
 192     fprintf (stderr, "<nil>\n");
 193 }
 194
 195
 196 /* Dump into STDERR all the data references from DATAREFS.  */
 197
 198 DEBUG_FUNCTION void
 199 debug_data_references (vec<data_reference_p> datarefs)
 200 {
 201   dump_data_references (stderr, datarefs);
 202 }
 203
 204 /* Print to STDERR the data_reference DR.  */
 205
 206 DEBUG_FUNCTION void
 207 debug_data_reference (struct data_reference *dr)
 208 {
 209   dump_data_reference (stderr, dr);
 210 }
 211
 212 /* Dump function for a DATA_REFERENCE structure.  */
 213
 214 void
 215 dump_data_reference (FILE *outf,
 216                      struct data_reference *dr)
 217 {
 218   unsigned int i;
 219
 220   fprintf (outf, "#(Data Ref: \n");
 221   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
 222   fprintf (outf, "#  stmt: ");
 223   print_gimple_stmt (outf, DR_STMT (dr), 0);
 224   fprintf (outf, "#  ref: ");
 225   print_generic_stmt (outf, DR_REF (dr));
 226   fprintf (outf, "#  base_object: ");
 227   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
 228
 229   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
 230     {
 231       fprintf (outf, "#  Access function %d: ", i);
 232       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
 233     }
 234   fprintf (outf, "#)\n");
 235 }
 236
 237 /* Unified dump function for a DATA_REFERENCE structure.  */
 238
 239 DEBUG_FUNCTION void
 240 debug (data_reference &ref)
 241 {
 242   dump_data_reference (stderr, &ref);
 243 }
 244
 245 DEBUG_FUNCTION void
 246 debug (data_reference *ptr)
 247 {
 248   if (ptr)
 249     debug (*ptr);
 250   else
 251     fprintf (stderr, "<nil>\n");
 252 }
 253
 254
 255 /* Dumps the affine function described by FN to the file OUTF.  */
 256
 257 DEBUG_FUNCTION void
 258 dump_affine_function (FILE *outf, affine_fn fn)
 259 {
 260   unsigned i;
 261   tree coef;
 262
 263   print_generic_expr (outf, fn[0], TDF_SLIM);
 264   for (i = 1; fn.iterate (i, &coef); i++)
 265     {
 266       fprintf (outf, " + ");
 267       print_generic_expr (outf, coef, TDF_SLIM);
 268       fprintf (outf, " * x_%u", i);
 269     }
 270 }
 271
 272 /* Dumps the conflict function CF to the file OUTF.  */
 273
 274 DEBUG_FUNCTION void
 275 dump_conflict_function (FILE *outf, conflict_function *cf)
 276 {
 277   unsigned i;
 278
 279   if (cf->n == NO_DEPENDENCE)
 280     fprintf (outf, "no dependence");
 281   else if (cf->n == NOT_KNOWN)
 282     fprintf (outf, "not known");
 283   else
 284     {
 285       for (i = 0; i < cf->n; i++)
 286         {
 287           if (i != 0)
 288             fprintf (outf, " ");
 289           fprintf (outf, "[");
 290           dump_affine_function (outf, cf->fns[i]);
 291           fprintf (outf, "]");
 292         }
 293     }
 294 }
 295
 296 /* Dump function for a SUBSCRIPT structure.  */
 297
 298 DEBUG_FUNCTION void
 299 dump_subscript (FILE *outf, struct subscript *subscript)
 300 {
 301   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
 302
 303   fprintf (outf, "\n (subscript \n");
 304   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
 305   dump_conflict_function (outf, cf);
 306   if (CF_NONTRIVIAL_P (cf))
 307     {
 308       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 309       fprintf (outf, "\n  last_conflict: ");
 310       print_generic_expr (outf, last_iteration);
 311     }
 312
 313   cf = SUB_CONFLICTS_IN_B (subscript);
 314   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
 315   dump_conflict_function (outf, cf);
 316   if (CF_NONTRIVIAL_P (cf))
 317     {
 318       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 319       fprintf (outf, "\n  last_conflict: ");
 320       print_generic_expr (outf, last_iteration);
 321     }
 322
 323   fprintf (outf, "\n  (Subscript distance: ");
 324   print_generic_expr (outf, SUB_DISTANCE (subscript));
 325   fprintf (outf, " ))\n");
 326 }
 327
 328 /* Print the classic direction vector DIRV to OUTF.  */
 329
 330 DEBUG_FUNCTION void
 331 print_direction_vector (FILE *outf,
 332                         lambda_vector dirv,
 333                         int length)
 334 {
 335   int eq;
 336
 337   for (eq = 0; eq < length; eq++)
 338     {
 339       enum data_dependence_direction dir = ((enum data_dependence_direction)
 340                                             dirv[eq]);
 341
 342       switch (dir)
 343         {
 344         case dir_positive:
 345           fprintf (outf, "    +");
 346           break;
 347         case dir_negative:
 348           fprintf (outf, "    -");
 349           break;
 350         case dir_equal:
 351           fprintf (outf, "    =");
 352           break;
 353         case dir_positive_or_equal:
 354           fprintf (outf, "   +=");
 355           break;
 356         case dir_positive_or_negative:
 357           fprintf (outf, "   +-");
 358           break;
 359         case dir_negative_or_equal:
 360           fprintf (outf, "   -=");
 361           break;
 362         case dir_star:
 363           fprintf (outf, "    *");
 364           break;
 365         default:
 366           fprintf (outf, "indep");
 367           break;
 368         }
 369     }
 370   fprintf (outf, "\n");
 371 }
 372
 373 /* Print a vector of direction vectors.  */
 374
 375 DEBUG_FUNCTION void
 376 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
 377                    int length)
 378 {
 379   for (lambda_vector v : dir_vects)
 380     print_direction_vector (outf, v, length);
 381 }
 382
 383 /* Print out a vector VEC of length N to OUTFILE.  */
 384
 385 DEBUG_FUNCTION void
 386 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
 387 {
 388   int i;
 389
 390   for (i = 0; i < n; i++)
 391     fprintf (outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]);
 392   fprintf (outfile, "\n");
 393 }
 394
 395 /* Print a vector of distance vectors.  */
 396
 397 DEBUG_FUNCTION void
 398 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
 399                     int length)
 400 {
 401   for (lambda_vector v : dist_vects)
 402     print_lambda_vector (outf, v, length);
 403 }
 404
 405 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
 406
 407 DEBUG_FUNCTION void
 408 dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
 409 {
 410   struct data_reference *dra, *drb;
 411
 412   fprintf (outf, "(Data Dep: \n");
 413
 414   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 415     {
 416       if (ddr)
 417         {
 418           dra = DDR_A (ddr);
 419           drb = DDR_B (ddr);
 420           if (dra)
 421             dump_data_reference (outf, dra);
 422           else
 423             fprintf (outf, "    (nil)\n");
 424           if (drb)
 425             dump_data_reference (outf, drb);
 426           else
 427             fprintf (outf, "    (nil)\n");
 428         }
 429       fprintf (outf, "    (don't know)\n)\n");
 430       return;
 431     }
 432
 433   dra = DDR_A (ddr);
 434   drb = DDR_B (ddr);
 435   dump_data_reference (outf, dra);
 436   dump_data_reference (outf, drb);
 437
 438   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 439     fprintf (outf, "    (no dependence)\n");
 440
 441   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
 442     {
 443       unsigned int i;
 444       class loop *loopi;
 445
 446       subscript *sub;
 447       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
 448         {
 449           fprintf (outf, "  access_fn_A: ");
 450           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
 451           fprintf (outf, "  access_fn_B: ");
 452           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
 453           dump_subscript (outf, sub);
 454         }
 455
 456       fprintf (outf, "  loop nest: (");
 457       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
 458         fprintf (outf, "%d ", loopi->num);
 459       fprintf (outf, ")\n");
 460
 461       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
 462         {
 463           fprintf (outf, "  distance_vector: ");
 464           print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
 465                                DDR_NB_LOOPS (ddr));
 466         }
 467
 468       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
 469         {
 470           fprintf (outf, "  direction_vector: ");
 471           print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
 472                                   DDR_NB_LOOPS (ddr));
 473         }
 474     }
 475
 476   fprintf (outf, ")\n");
 477 }
 478
 479 /* Debug version.  */
 480
 481 DEBUG_FUNCTION void
 482 debug_data_dependence_relation (const struct data_dependence_relation *ddr)
 483 {
 484   dump_data_dependence_relation (stderr, ddr);
 485 }
 486
 487 /* Dump into FILE all the dependence relations from DDRS.  */
 488
 489 DEBUG_FUNCTION void
 490 dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
 491 {
 492   for (auto ddr : ddrs)
 493     dump_data_dependence_relation (file, ddr);
 494 }
 495
 496 DEBUG_FUNCTION void
 497 debug (vec<ddr_p> &ref)
 498 {
 499   dump_data_dependence_relations (stderr, ref);
 500 }
 501
 502 DEBUG_FUNCTION void
 503 debug (vec<ddr_p> *ptr)
 504 {
 505   if (ptr)
 506     debug (*ptr);
 507   else
 508     fprintf (stderr, "<nil>\n");
 509 }
 510
 511
 512 /* Dump to STDERR all the dependence relations from DDRS.  */
 513
 514 DEBUG_FUNCTION void
 515 debug_data_dependence_relations (vec<ddr_p> ddrs)
 516 {
 517   dump_data_dependence_relations (stderr, ddrs);
 518 }
 519
 520 /* Dumps the distance and direction vectors in FILE.  DDRS contains
 521    the dependence relations, and VECT_SIZE is the size of the
 522    dependence vectors, or in other words the number of loops in the
 523    considered nest.  */
 524
 525 DEBUG_FUNCTION void
 526 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
 527 {
 528   for (data_dependence_relation *ddr : ddrs)
 529     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
 530       {
 531         for (lambda_vector v : DDR_DIST_VECTS (ddr))
 532           {
 533             fprintf (file, "DISTANCE_V (");
 534             print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
 535             fprintf (file, ")\n");
 536           }
 537
 538         for (lambda_vector v : DDR_DIR_VECTS (ddr))
 539           {
 540             fprintf (file, "DIRECTION_V (");
 541             print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
 542             fprintf (file, ")\n");
 543           }
 544       }
 545
 546   fprintf (file, "\n\n");
 547 }
 548
 549 /* Dumps the data dependence relations DDRS in FILE.  */
 550
 551 DEBUG_FUNCTION void
 552 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
 553 {
 554   for (data_dependence_relation *ddr : ddrs)
 555     dump_data_dependence_relation (file, ddr);
 556
 557   fprintf (file, "\n\n");
 558 }
 559
 560 DEBUG_FUNCTION void
 561 debug_ddrs (vec<ddr_p> ddrs)
 562 {
 563   dump_ddrs (stderr, ddrs);
 564 }
 565
 566 /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
 567    OP0 CODE OP1, where:
 568
 569    - OP0 CODE OP1 has integral type TYPE
 570    - the range of OP0 is given by OP0_RANGE and
 571    - the range of OP1 is given by OP1_RANGE.
 572
 573    Independently of RESULT_RANGE, try to compute:
 574
 575      DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
 576              - (sizetype) (OP0 CODE OP1)
 577
 578    as a constant and subtract DELTA from the ssizetype constant in *OFF.
 579    Return true on success, or false if DELTA is not known at compile time.
 580
 581    Truncation and sign changes are known to distribute over CODE, i.e.
 582
 583      (itype) (A CODE B) == (itype) A CODE (itype) B
 584
 585    for any integral type ITYPE whose precision is no greater than the
 586    precision of A and B.  */
 587
 588 static bool
 589 compute_distributive_range (tree type, value_range &op0_range,
 590                             tree_code code, value_range &op1_range,
 591                             tree *off, value_range *result_range)
 592 {
 593   gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
 594   if (result_range)
 595     {
 596       range_operator *op = range_op_handler (code, type);
 597       op->fold_range (*result_range, type, op0_range, op1_range);
 598     }
 599
 600   /* The distributive property guarantees that if TYPE is no narrower
 601      than SIZETYPE,
 602
 603        (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
 604
 605      and so we can treat DELTA as zero.  */
 606   if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
 607     return true;
 608
 609   /* If overflow is undefined, we can assume that:
 610
 611        X == (ssizetype) OP0 CODE (ssizetype) OP1
 612
 613      is within the range of TYPE, i.e.:
 614
 615        X == (ssizetype) (TYPE) X
 616
 617      Distributing the (TYPE) truncation over X gives:
 618
 619        X == (ssizetype) (OP0 CODE OP1)
 620
 621      Casting both sides to sizetype and distributing the sizetype cast
 622      over X gives:
 623
 624        (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
 625
 626      and so we can treat DELTA as zero.  */
 627   if (TYPE_OVERFLOW_UNDEFINED (type))
 628     return true;
 629
 630   /* Compute the range of:
 631
 632        (ssizetype) OP0 CODE (ssizetype) OP1
 633
 634      The distributive property guarantees that this has the same bitpattern as:
 635
 636        (sizetype) OP0 CODE (sizetype) OP1
 637
 638      but its range is more conducive to analysis.  */
 639   range_cast (op0_range, ssizetype);
 640   range_cast (op1_range, ssizetype);
 641   value_range wide_range;
 642   range_operator *op = range_op_handler (code, ssizetype);
 643   bool saved_flag_wrapv = flag_wrapv;
 644   flag_wrapv = 1;
 645   op->fold_range (wide_range, ssizetype, op0_range, op1_range);
 646   flag_wrapv = saved_flag_wrapv;
 647   if (wide_range.num_pairs () != 1 || !range_int_cst_p (&wide_range))
 648     return false;
 649
 650   wide_int lb = wide_range.lower_bound ();
 651   wide_int ub = wide_range.upper_bound ();
 652
 653   /* Calculate the number of times that each end of the range overflows or
 654      underflows TYPE.  We can only calculate DELTA if the numbers match.  */
 655   unsigned int precision = TYPE_PRECISION (type);
 656   if (!TYPE_UNSIGNED (type))
 657     {
 658       wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
 659       lb -= type_min;
 660       ub -= type_min;
 661     }
 662   wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
 663   lb &= upper_bits;
 664   ub &= upper_bits;
 665   if (lb != ub)
 666     return false;
 667
 668   /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
 669      negative values indicating underflow.  The low PRECISION bits of LB
 670      are clear, so DELTA is therefore LB (== UB).  */
 671   *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
 672   return true;
 673 }
 674
 675 /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
 676    given that OP has type FROM_TYPE and range RANGE.  Both TO_TYPE and
 677    FROM_TYPE are integral types.  */
 678
 679 static bool
 680 nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range)
 681 {
 682   gcc_assert (INTEGRAL_TYPE_P (to_type)
 683               && INTEGRAL_TYPE_P (from_type)
 684               && !TYPE_OVERFLOW_TRAPS (to_type)
 685               && !TYPE_OVERFLOW_TRAPS (from_type));
 686
 687   /* Converting to something no narrower than sizetype and then to sizetype
 688      is equivalent to converting directly to sizetype.  */
 689   if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
 690     return true;
 691
 692   /* Check whether TO_TYPE can represent all values that FROM_TYPE can.  */
 693   if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
 694       && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
 695     return true;
 696
 697   /* For narrowing conversions, we could in principle test whether
 698      the bits in FROM_TYPE but not in TO_TYPE have a fixed value
 699      and apply a constant adjustment.
 700
 701      For other conversions (which involve a sign change) we could
 702      check that the signs are always equal, and apply a constant
 703      adjustment if the signs are negative.
 704
 705      However, both cases should be rare.  */
 706   return range_fits_type_p (&range, TYPE_PRECISION (to_type),
 707                             TYPE_SIGN (to_type));
 708 }
 709
 710 static void
 711 split_constant_offset (tree type, tree *var, tree *off,
 712                        value_range *result_range,
 713                        hash_map<tree, std::pair<tree, tree> > &cache,
 714                        unsigned *limit);
 715
 716 /* Helper function for split_constant_offset.  If TYPE is a pointer type,
 717    try to express OP0 CODE OP1 as:
 718
 719      POINTER_PLUS <*VAR, (sizetype) *OFF>
 720
 721    where:
 722
 723    - *VAR has type TYPE
 724    - *OFF is a constant of type ssizetype.
 725
 726    If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
 727
 728      *VAR + (sizetype) *OFF
 729
 730    where:
 731
 732    - *VAR has type sizetype
 733    - *OFF is a constant of type ssizetype.
 734
 735    In both cases, OP0 CODE OP1 has type TYPE.
 736
 737    Return true on success.  A false return value indicates that we can't
 738    do better than set *OFF to zero.
 739
 740    When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
 741    if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
 742
 743    CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
 744    visited.  LIMIT counts down the number of SSA names that we are
 745    allowed to process before giving up.  */
 746
 747 static bool
 748 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 749                          tree *var, tree *off, value_range *result_range,
 750                          hash_map<tree, std::pair<tree, tree> > &cache,
 751                          unsigned *limit)
 752 {
 753   tree var0, var1;
 754   tree off0, off1;
 755   value_range op0_range, op1_range;
 756
 757   *var = NULL_TREE;
 758   *off = NULL_TREE;
 759
 760   if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
 761     return false;
 762
 763   switch (code)
 764     {
 765     case INTEGER_CST:
 766       *var = size_int (0);
 767       *off = fold_convert (ssizetype, op0);
 768       if (result_range)
 769         result_range->set (op0, op0);
 770       return true;
 771
 772     case POINTER_PLUS_EXPR:
 773       split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
 774       split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
 775       *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
 776       *off = size_binop (PLUS_EXPR, off0, off1);
 777       return true;
 778
 779     case PLUS_EXPR:
 780     case MINUS_EXPR:
 781       split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
 782       split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
 783       *off = size_binop (code, off0, off1);
 784       if (!compute_distributive_range (type, op0_range, code, op1_range,
 785                                        off, result_range))
 786         return false;
 787       *var = fold_build2 (code, sizetype, var0, var1);
 788       return true;
 789
 790     case MULT_EXPR:
 791       if (TREE_CODE (op1) != INTEGER_CST)
 792         return false;
 793
 794       split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
 795       op1_range.set (op1, op1);
 796       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
 797       if (!compute_distributive_range (type, op0_range, code, op1_range,
 798                                        off, result_range))
 799         return false;
 800       *var = fold_build2 (MULT_EXPR, sizetype, var0,
 801                           fold_convert (sizetype, op1));
 802       return true;
 803
 804     case ADDR_EXPR:
 805       {
 806         tree base, poffset;
 807         poly_int64 pbitsize, pbitpos, pbytepos;
 808         machine_mode pmode;
 809         int punsignedp, preversep, pvolatilep;
 810
 811         op0 = TREE_OPERAND (op0, 0);
 812         base
 813           = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
 814                                  &punsignedp, &preversep, &pvolatilep);
 815
 816         if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 817           return false;
 818         base = build_fold_addr_expr (base);
 819         off0 = ssize_int (pbytepos);
 820
 821         if (poffset)
 822           {
 823             split_constant_offset (poffset, &poffset, &off1, nullptr,
 824                                    cache, limit);
 825             off0 = size_binop (PLUS_EXPR, off0, off1);
 826             base = fold_build_pointer_plus (base, poffset);
 827           }
 828
 829         var0 = fold_convert (type, base);
 830
 831         /* If variable length types are involved, punt, otherwise casts
 832            might be converted into ARRAY_REFs in gimplify_conversion.
 833            To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
 834            possibly no longer appears in current GIMPLE, might resurface.
 835            This perhaps could run
 836            if (CONVERT_EXPR_P (var0))
 837              {
 838                gimplify_conversion (&var0);
 839                // Attempt to fill in any within var0 found ARRAY_REF's
 840                // element size from corresponding op embedded ARRAY_REF,
 841                // if unsuccessful, just punt.
 842              }  */
 843         while (POINTER_TYPE_P (type))
 844           type = TREE_TYPE (type);
 845         if (int_size_in_bytes (type) < 0)
 846           return false;
 847
 848         *var = var0;
 849         *off = off0;
 850         return true;
 851       }
 852
 853     case SSA_NAME:
 854       {
 855         if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
 856           return false;
 857
 858         gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
 859         enum tree_code subcode;
 860
 861         if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
 862           return false;
 863
 864         subcode = gimple_assign_rhs_code (def_stmt);
 865
 866         /* We are using a cache to avoid un-CSEing large amounts of code.  */
 867         bool use_cache = false;
 868         if (!has_single_use (op0)
 869             && (subcode == POINTER_PLUS_EXPR
 870                 || subcode == PLUS_EXPR
 871                 || subcode == MINUS_EXPR
 872                 || subcode == MULT_EXPR
 873                 || subcode == ADDR_EXPR
 874                 || CONVERT_EXPR_CODE_P (subcode)))
 875           {
 876             use_cache = true;
 877             bool existed;
 878             std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
 879             if (existed)
 880               {
 881                 if (integer_zerop (e.second))
 882                   return false;
 883                 *var = e.first;
 884                 *off = e.second;
 885                 /* The caller sets the range in this case.  */
 886                 return true;
 887               }
 888             e = std::make_pair (op0, ssize_int (0));
 889           }
 890
 891         if (*limit == 0)
 892           return false;
 893         --*limit;
 894
 895         var0 = gimple_assign_rhs1 (def_stmt);
 896         var1 = gimple_assign_rhs2 (def_stmt);
 897
 898         bool res = split_constant_offset_1 (type, var0, subcode, var1,
 899                                             var, off, nullptr, cache, limit);
 900         if (res && use_cache)
 901           *cache.get (op0) = std::make_pair (*var, *off);
 902         /* The caller sets the range in this case.  */
 903         return res;
 904       }
 905     CASE_CONVERT:
 906       {
 907         /* We can only handle the following conversions:
 908
 909            - Conversions from one pointer type to another pointer type.
 910
 911            - Conversions from one non-trapping integral type to another
 912              non-trapping integral type.  In this case, the recursive
 913              call makes sure that:
 914
 915                (sizetype) OP0
 916
 917              can be expressed as a sizetype operation involving VAR and OFF,
 918              and all we need to do is check whether:
 919
 920                (sizetype) OP0 == (sizetype) (TYPE) OP0
 921
 922            - Conversions from a non-trapping sizetype-size integral type to
 923              a like-sized pointer type.  In this case, the recursive call
 924              makes sure that:
 925
 926                (sizetype) OP0 == *VAR + (sizetype) *OFF
 927
 928              and we can convert that to:
 929
 930                POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
 931
 932            - Conversions from a sizetype-sized pointer type to a like-sized
 933              non-trapping integral type.  In this case, the recursive call
 934              makes sure that:
 935
 936                OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
 937
 938              where the POINTER_PLUS and *VAR have the same precision as
 939              TYPE (and the same precision as sizetype).  Then:
 940
 941                (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF.  */
 942         tree itype = TREE_TYPE (op0);
 943         if ((POINTER_TYPE_P (itype)
 944              || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
 945             && (POINTER_TYPE_P (type)
 946                 || (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
 947             && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
 948                 || (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
 949                     && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
 950           {
 951             if (POINTER_TYPE_P (type))
 952               {
 953                 split_constant_offset (op0, var, off, nullptr, cache, limit);
 954                 *var = fold_convert (type, *var);
 955               }
 956             else if (POINTER_TYPE_P (itype))
 957               {
 958                 split_constant_offset (op0, var, off, nullptr, cache, limit);
 959                 *var = fold_convert (sizetype, *var);
 960               }
 961             else
 962               {
 963                 split_constant_offset (op0, var, off, &op0_range,
 964                                        cache, limit);
 965                 if (!nop_conversion_for_offset_p (type, itype, op0_range))
 966                   return false;
 967                 if (result_range)
 968                   {
 969                     *result_range = op0_range;
 970                     range_cast (*result_range, type);
 971                   }
 972               }
 973             return true;
 974           }
 975         return false;
 976       }
 977
 978     default:
 979       return false;
 980     }
 981 }
 982
 983 /* If EXP has pointer type, try to express it as:
 984
 985      POINTER_PLUS <*VAR, (sizetype) *OFF>
 986
 987    where:
 988
 989    - *VAR has the same type as EXP
 990    - *OFF is a constant of type ssizetype.
 991
 992    If EXP has an integral type, try to express (sizetype) EXP as:
 993
 994      *VAR + (sizetype) *OFF
 995
 996    where:
 997
 998    - *VAR has type sizetype
 999    - *OFF is a constant of type ssizetype.
1000
1001    If EXP_RANGE is nonnull, set it to the range of EXP.
1002
1003    CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1004    visited.  LIMIT counts down the number of SSA names that we are
1005    allowed to process before giving up.  */
1006
1007 static void
1008 split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range,
1009                        hash_map<tree, std::pair<tree, tree> > &cache,
1010                        unsigned *limit)
1011 {
1012   tree type = TREE_TYPE (exp), op0, op1;
1013   enum tree_code code;
1014
1015   code = TREE_CODE (exp);
1016   if (exp_range)
1017     {
1018       *exp_range = type;
1019       if (code == SSA_NAME)
1020         {
1021           value_range vr;
1022           get_range_query (cfun)->range_of_expr (vr, exp);
1023           if (vr.undefined_p ())
1024             vr.set_varying (TREE_TYPE (exp));
1025           wide_int var_min = wi::to_wide (vr.min ());
1026           wide_int var_max = wi::to_wide (vr.max ());
1027           value_range_kind vr_kind = vr.kind ();
1028           wide_int var_nonzero = get_nonzero_bits (exp);
1029           vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1030                                                        &var_min, &var_max,
1031                                                        var_nonzero,
1032                                                        TYPE_SIGN (type));
1033           /* This check for VR_VARYING is here because the old code
1034              using get_range_info would return VR_RANGE for the entire
1035              domain, instead of VR_VARYING.  The new code normalizes
1036              full-domain ranges to VR_VARYING.  */
1037           if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
1038             *exp_range = value_range (type, var_min, var_max);
1039         }
1040     }
1041
1042   if (!tree_is_chrec (exp)
1043       && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1044     {
1045       extract_ops_from_tree (exp, &code, &op0, &op1);
1046       if (split_constant_offset_1 (type, op0, code, op1, var, off,
1047                                    exp_range, cache, limit))
1048         return;
1049     }
1050
1051   *var = exp;
1052   if (INTEGRAL_TYPE_P (type))
1053     *var = fold_convert (sizetype, *var);
1054   *off = ssize_int (0);
1055
1056   value_range r;
1057   if (exp_range && code != SSA_NAME
1058       && get_range_query (cfun)->range_of_expr (r, exp)
1059       && !r.undefined_p ())
1060     *exp_range = r;
1061 }
1062
1063 /* Expresses EXP as VAR + OFF, where OFF is a constant.  VAR has the same
1064    type as EXP while OFF has type ssizetype.  */
1065
1066 void
1067 split_constant_offset (tree exp, tree *var, tree *off)
1068 {
1069   unsigned limit = param_ssa_name_def_chain_limit;
1070   static hash_map<tree, std::pair<tree, tree> > *cache;
1071   if (!cache)
1072     cache = new hash_map<tree, std::pair<tree, tree> > (37);
1073   split_constant_offset (exp, var, off, nullptr, *cache, &limit);
1074   *var = fold_convert (TREE_TYPE (exp), *var);
1075   cache->empty ();
1076 }
1077
1078 /* Returns the address ADDR of an object in a canonical shape (without nop
1079    casts, and with type of pointer to the object).  */
1080
1081 static tree
1082 canonicalize_base_object_address (tree addr)
1083 {
1084   tree orig = addr;
1085
1086   STRIP_NOPS (addr);
1087
1088   /* The base address may be obtained by casting from integer, in that case
1089      keep the cast.  */
1090   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1091     return orig;
1092
1093   if (TREE_CODE (addr) != ADDR_EXPR)
1094     return addr;
1095
1096   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1097 }
1098
1099 /* Analyze the behavior of memory reference REF within STMT.
1100    There are two modes:
1101
1102    - BB analysis.  In this case we simply split the address into base,
1103      init and offset components, without reference to any containing loop.
1104      The resulting base and offset are general expressions and they can
1105      vary arbitrarily from one iteration of the containing loop to the next.
1106      The step is always zero.
1107
1108    - loop analysis.  In this case we analyze the reference both wrt LOOP
1109      and on the basis that the reference occurs (is "used") in LOOP;
1110      see the comment above analyze_scalar_evolution_in_loop for more
1111      information about this distinction.  The base, init, offset and
1112      step fields are all invariant in LOOP.
1113
1114    Perform BB analysis if LOOP is null, or if LOOP is the function's
1115    dummy outermost loop.  In other cases perform loop analysis.
1116
1117    Return true if the analysis succeeded and store the results in DRB if so.
1118    BB analysis can only fail for bitfield or reversed-storage accesses.  */
1119
1120 opt_result
1121 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1122                       class loop *loop, const gimple *stmt)
1123 {
1124   poly_int64 pbitsize, pbitpos;
1125   tree base, poffset;
1126   machine_mode pmode;
1127   int punsignedp, preversep, pvolatilep;
1128   affine_iv base_iv, offset_iv;
1129   tree init, dinit, step;
1130   bool in_loop = (loop && loop->num);
1131
1132   if (dump_file && (dump_flags & TDF_DETAILS))
1133     fprintf (dump_file, "analyze_innermost: ");
1134
1135   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1136                               &punsignedp, &preversep, &pvolatilep);
1137   gcc_assert (base != NULL_TREE);
1138
1139   poly_int64 pbytepos;
1140   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
1141     return opt_result::failure_at (stmt,
1142                                    "failed: bit offset alignment.\n");
1143
1144   if (preversep)
1145     return opt_result::failure_at (stmt,
1146                                    "failed: reverse storage order.\n");
1147
1148   /* Calculate the alignment and misalignment for the inner reference.  */
1149   unsigned int HOST_WIDE_INT bit_base_misalignment;
1150   unsigned int bit_base_alignment;
1151   get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1152
1153   /* There are no bitfield references remaining in BASE, so the values
1154      we got back must be whole bytes.  */
1155   gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1156               && bit_base_misalignment % BITS_PER_UNIT == 0);
1157   unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1158   poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1159
1160   if (TREE_CODE (base) == MEM_REF)
1161     {
1162       if (!integer_zerop (TREE_OPERAND (base, 1)))
1163         {
1164           /* Subtract MOFF from the base and add it to POFFSET instead.
1165              Adjust the misalignment to reflect the amount we subtracted.  */
1166           poly_offset_int moff = mem_ref_offset (base);
1167           base_misalignment -= moff.force_shwi ();
1168           tree mofft = wide_int_to_tree (sizetype, moff);
1169           if (!poffset)
1170             poffset = mofft;
1171           else
1172             poffset = size_binop (PLUS_EXPR, poffset, mofft);
1173         }
1174       base = TREE_OPERAND (base, 0);
1175     }
1176   else
1177     base = build_fold_addr_expr (base);
1178
1179   if (in_loop)
1180     {
1181       if (!simple_iv (loop, loop, base, &base_iv, true))
1182         return opt_result::failure_at
1183           (stmt, "failed: evolution of base is not affine.\n");
1184     }
1185   else
1186     {
1187       base_iv.base = base;
1188       base_iv.step = ssize_int (0);
1189       base_iv.no_overflow = true;
1190     }
1191
1192   if (!poffset)
1193     {
1194       offset_iv.base = ssize_int (0);
1195       offset_iv.step = ssize_int (0);
1196     }
1197   else
1198     {
1199       if (!in_loop)
1200         {
1201           offset_iv.base = poffset;
1202           offset_iv.step = ssize_int (0);
1203         }
1204       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1205         return opt_result::failure_at
1206           (stmt, "failed: evolution of offset is not affine.\n");
1207     }
1208
1209   init = ssize_int (pbytepos);
1210
1211   /* Subtract any constant component from the base and add it to INIT instead.
1212      Adjust the misalignment to reflect the amount we subtracted.  */
1213   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1214   init = size_binop (PLUS_EXPR, init, dinit);
1215   base_misalignment -= TREE_INT_CST_LOW (dinit);
1216
1217   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1218   init = size_binop (PLUS_EXPR, init, dinit);
1219
1220   step = size_binop (PLUS_EXPR,
1221                      fold_convert (ssizetype, base_iv.step),
1222                      fold_convert (ssizetype, offset_iv.step));
1223
1224   base = canonicalize_base_object_address (base_iv.base);
1225
1226   /* See if get_pointer_alignment can guarantee a higher alignment than
1227      the one we calculated above.  */
1228   unsigned int HOST_WIDE_INT alt_misalignment;
1229   unsigned int alt_alignment;
1230   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1231
1232   /* As above, these values must be whole bytes.  */
1233   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1234               && alt_misalignment % BITS_PER_UNIT == 0);
1235   alt_alignment /= BITS_PER_UNIT;
1236   alt_misalignment /= BITS_PER_UNIT;
1237
1238   if (base_alignment < alt_alignment)
1239     {
1240       base_alignment = alt_alignment;
1241       base_misalignment = alt_misalignment;
1242     }
1243
1244   drb->base_address = base;
1245   drb->offset = fold_convert (ssizetype, offset_iv.base);
1246   drb->init = init;
1247   drb->step = step;
1248   if (known_misalignment (base_misalignment, base_alignment,
1249                           &drb->base_misalignment))
1250     drb->base_alignment = base_alignment;
1251   else
1252     {
1253       drb->base_alignment = known_alignment (base_misalignment);
1254       drb->base_misalignment = 0;
1255     }
1256   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1257   drb->step_alignment = highest_pow2_factor (step);
1258
1259   if (dump_file && (dump_flags & TDF_DETAILS))
1260     fprintf (dump_file, "success.\n");
1261
1262   return opt_result::success ();
1263 }
1264
1265 /* Return true if OP is a valid component reference for a DR access
1266    function.  This accepts a subset of what handled_component_p accepts.  */
1267
1268 static bool
1269 access_fn_component_p (tree op)
1270 {
1271   switch (TREE_CODE (op))
1272     {
1273     case REALPART_EXPR:
1274     case IMAGPART_EXPR:
1275     case ARRAY_REF:
1276       return true;
1277
1278     case COMPONENT_REF:
1279       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1280
1281     default:
1282       return false;
1283     }
1284 }
1285
1286 /* Returns whether BASE can have a access_fn_component_p with BASE
1287    as base.  */
1288
1289 static bool
1290 base_supports_access_fn_components_p (tree base)
1291 {
1292   switch (TREE_CODE (TREE_TYPE (base)))
1293     {
1294     case COMPLEX_TYPE:
1295     case ARRAY_TYPE:
1296     case RECORD_TYPE:
1297       return true;
1298     default:
1299       return false;
1300     }
1301 }
1302
1303 /* Determines the base object and the list of indices of memory reference
1304    DR, analyzed in LOOP and instantiated before NEST.  */
1305
1306 static void
1307 dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
1308 {
1309   /* If analyzing a basic-block there are no indices to analyze
1310      and thus no access functions.  */
1311   if (!nest)
1312     {
1313       dri->base_object = ref;
1314       dri->access_fns.create (0);
1315       return;
1316     }
1317
1318   vec<tree> access_fns = vNULL;
1319
1320   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1321      into a two element array with a constant index.  The base is
1322      then just the immediate underlying object.  */
1323   if (TREE_CODE (ref) == REALPART_EXPR)
1324     {
1325       ref = TREE_OPERAND (ref, 0);
1326       access_fns.safe_push (integer_zero_node);
1327     }
1328   else if (TREE_CODE (ref) == IMAGPART_EXPR)
1329     {
1330       ref = TREE_OPERAND (ref, 0);
1331       access_fns.safe_push (integer_one_node);
1332     }
1333
1334   /* Analyze access functions of dimensions we know to be independent.
1335      The list of component references handled here should be kept in
1336      sync with access_fn_component_p.  */
1337   while (handled_component_p (ref))
1338     {
1339       if (TREE_CODE (ref) == ARRAY_REF)
1340         {
1341           tree op = TREE_OPERAND (ref, 1);
1342           tree access_fn = analyze_scalar_evolution (loop, op);
1343           access_fn = instantiate_scev (nest, loop, access_fn);
1344           access_fns.safe_push (access_fn);
1345         }
1346       else if (TREE_CODE (ref) == COMPONENT_REF
1347                && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1348         {
1349           /* For COMPONENT_REFs of records (but not unions!) use the
1350              FIELD_DECL offset as constant access function so we can
1351              disambiguate a[i].f1 and a[i].f2.  */
1352           tree off = component_ref_field_offset (ref);
1353           off = size_binop (PLUS_EXPR,
1354                             size_binop (MULT_EXPR,
1355                                         fold_convert (bitsizetype, off),
1356                                         bitsize_int (BITS_PER_UNIT)),
1357                             DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1358           access_fns.safe_push (off);
1359         }
1360       else
1361         /* If we have an unhandled component we could not translate
1362            to an access function stop analyzing.  We have determined
1363            our base object in this case.  */
1364         break;
1365
1366       ref = TREE_OPERAND (ref, 0);
1367     }
1368
1369   /* If the address operand of a MEM_REF base has an evolution in the
1370      analyzed nest, add it as an additional independent access-function.  */
1371   if (TREE_CODE (ref) == MEM_REF)
1372     {
1373       tree op = TREE_OPERAND (ref, 0);
1374       tree access_fn = analyze_scalar_evolution (loop, op);
1375       access_fn = instantiate_scev (nest, loop, access_fn);
1376       STRIP_NOPS (access_fn);
1377       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1378         {
1379           tree memoff = TREE_OPERAND (ref, 1);
1380           tree base = initial_condition (access_fn);
1381           tree orig_type = TREE_TYPE (base);
1382           STRIP_USELESS_TYPE_CONVERSION (base);
1383           tree off;
1384           split_constant_offset (base, &base, &off);
1385           STRIP_USELESS_TYPE_CONVERSION (base);
1386           /* Fold the MEM_REF offset into the evolutions initial
1387              value to make more bases comparable.  */
1388           if (!integer_zerop (memoff))
1389             {
1390               off = size_binop (PLUS_EXPR, off,
1391                                 fold_convert (ssizetype, memoff));
1392               memoff = build_int_cst (TREE_TYPE (memoff), 0);
1393             }
1394           /* Adjust the offset so it is a multiple of the access type
1395              size and thus we separate bases that can possibly be used
1396              to produce partial overlaps (which the access_fn machinery
1397              cannot handle).  */
1398           wide_int rem;
1399           if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1400               && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1401               && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1402             rem = wi::mod_trunc
1403               (wi::to_wide (off),
1404                wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1405                SIGNED);
1406           else
1407             /* If we can't compute the remainder simply force the initial
1408                condition to zero.  */
1409             rem = wi::to_wide (off);
1410           off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1411           memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1412           /* And finally replace the initial condition.  */
1413           access_fn = chrec_replace_initial_condition
1414               (access_fn, fold_convert (orig_type, off));
1415           /* ???  This is still not a suitable base object for
1416              dr_may_alias_p - the base object needs to be an
1417              access that covers the object as whole.  With
1418              an evolution in the pointer this cannot be
1419              guaranteed.
1420              As a band-aid, mark the access so we can special-case
1421              it in dr_may_alias_p.  */
1422           tree old = ref;
1423           ref = fold_build2_loc (EXPR_LOCATION (ref),
1424                                  MEM_REF, TREE_TYPE (ref),
1425                                  base, memoff);
1426           MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1427           MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1428           dri->unconstrained_base = true;
1429           access_fns.safe_push (access_fn);
1430         }
1431     }
1432   else if (DECL_P (ref))
1433     {
1434       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1435       ref = build2 (MEM_REF, TREE_TYPE (ref),
1436                     build_fold_addr_expr (ref),
1437                     build_int_cst (reference_alias_ptr_type (ref), 0));
1438     }
1439
1440   dri->base_object = ref;
1441   dri->access_fns = access_fns;
1442 }
1443
1444 /* Extracts the alias analysis information from the memory reference DR.  */
1445
1446 static void
1447 dr_analyze_alias (struct data_reference *dr)
1448 {
1449   tree ref = DR_REF (dr);
1450   tree base = get_base_address (ref), addr;
1451
1452   if (INDIRECT_REF_P (base)
1453       || TREE_CODE (base) == MEM_REF)
1454     {
1455       addr = TREE_OPERAND (base, 0);
1456       if (TREE_CODE (addr) == SSA_NAME)
1457         DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1458     }
1459 }
1460
1461 /* Frees data reference DR.  */
1462
1463 void
1464 free_data_ref (data_reference_p dr)
1465 {
1466   DR_ACCESS_FNS (dr).release ();
1467   if (dr->alt_indices.base_object)
1468     dr->alt_indices.access_fns.release ();
1469   free (dr);
1470 }
1471
1472 /* Analyze memory reference MEMREF, which is accessed in STMT.
1473    The reference is a read if IS_READ is true, otherwise it is a write.
1474    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1475    within STMT, i.e. that it might not occur even if STMT is executed
1476    and runs to completion.
1477
1478    Return the data_reference description of MEMREF.  NEST is the outermost
1479    loop in which the reference should be instantiated, LOOP is the loop
1480    in which the data reference should be analyzed.  */
1481
1482 struct data_reference *
1483 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1484                  bool is_read, bool is_conditional_in_stmt)
1485 {
1486   struct data_reference *dr;
1487
1488   if (dump_file && (dump_flags & TDF_DETAILS))
1489     {
1490       fprintf (dump_file, "Creating dr for ");
1491       print_generic_expr (dump_file, memref, TDF_SLIM);
1492       fprintf (dump_file, "\n");
1493     }
1494
1495   dr = XCNEW (struct data_reference);
1496   DR_STMT (dr) = stmt;
1497   DR_REF (dr) = memref;
1498   DR_IS_READ (dr) = is_read;
1499   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1500
1501   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1502                         nest != NULL ? loop : NULL, stmt);
1503   dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop);
1504   dr_analyze_alias (dr);
1505
1506   if (dump_file && (dump_flags & TDF_DETAILS))
1507     {
1508       unsigned i;
1509       fprintf (dump_file, "\tbase_address: ");
1510       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1511       fprintf (dump_file, "\n\toffset from base address: ");
1512       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1513       fprintf (dump_file, "\n\tconstant offset from base address: ");
1514       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1515       fprintf (dump_file, "\n\tstep: ");
1516       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1517       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1518       fprintf (dump_file, "\n\tbase misalignment: %d",
1519                DR_BASE_MISALIGNMENT (dr));
1520       fprintf (dump_file, "\n\toffset alignment: %d",
1521                DR_OFFSET_ALIGNMENT (dr));
1522       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1523       fprintf (dump_file, "\n\tbase_object: ");
1524       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1525       fprintf (dump_file, "\n");
1526       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1527         {
1528           fprintf (dump_file, "\tAccess function %d: ", i);
1529           print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1530         }
1531     }
1532
1533   return dr;
1534 }
1535
1536 /*  A helper function computes order between two tree expressions T1 and T2.
1537     This is used in comparator functions sorting objects based on the order
1538     of tree expressions.  The function returns -1, 0, or 1.  */
1539
1540 int
1541 data_ref_compare_tree (tree t1, tree t2)
1542 {
1543   int i, cmp;
1544   enum tree_code code;
1545   char tclass;
1546
1547   if (t1 == t2)
1548     return 0;
1549   if (t1 == NULL)
1550     return -1;
1551   if (t2 == NULL)
1552     return 1;
1553
1554   STRIP_USELESS_TYPE_CONVERSION (t1);
1555   STRIP_USELESS_TYPE_CONVERSION (t2);
1556   if (t1 == t2)
1557     return 0;
1558
1559   if (TREE_CODE (t1) != TREE_CODE (t2)
1560       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1561     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1562
1563   code = TREE_CODE (t1);
1564   switch (code)
1565     {
1566     case INTEGER_CST:
1567       return tree_int_cst_compare (t1, t2);
1568
1569     case STRING_CST:
1570       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1571         return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1572       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1573                      TREE_STRING_LENGTH (t1));
1574
1575     case SSA_NAME:
1576       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1577         return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1578       break;
1579
1580     default:
1581       if (POLY_INT_CST_P (t1))
1582         return compare_sizes_for_sort (wi::to_poly_widest (t1),
1583                                        wi::to_poly_widest (t2));
1584
1585       tclass = TREE_CODE_CLASS (code);
1586
1587       /* For decls, compare their UIDs.  */
1588       if (tclass == tcc_declaration)
1589         {
1590           if (DECL_UID (t1) != DECL_UID (t2))
1591             return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1592           break;
1593         }
1594       /* For expressions, compare their operands recursively.  */
1595       else if (IS_EXPR_CODE_CLASS (tclass))
1596         {
1597           for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1598             {
1599               cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1600                                            TREE_OPERAND (t2, i));
1601               if (cmp != 0)
1602                 return cmp;
1603             }
1604         }
1605       else
1606         gcc_unreachable ();
1607     }
1608
1609   return 0;
1610 }
1611
1612 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1613    check.  */
1614
1615 opt_result
1616 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1617 {
1618   if (dump_enabled_p ())
1619     dump_printf (MSG_NOTE,
1620                  "consider run-time aliasing test between %T and %T\n",
1621                  DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1622
1623   if (!speed_p)
1624     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1625                                    "runtime alias check not supported when"
1626                                    " optimizing for size.\n");
1627
1628   /* FORNOW: We don't support versioning with outer-loop in either
1629      vectorization or loop distribution.  */
1630   if (loop != NULL && loop->inner != NULL)
1631     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1632                                    "runtime alias check not supported for"
1633                                    " outer loop.\n");
1634
1635   return opt_result::success ();
1636 }
1637
1638 /* Operator == between two dr_with_seg_len objects.
1639
1640    This equality operator is used to make sure two data refs
1641    are the same one so that we will consider to combine the
1642    aliasing checks of those two pairs of data dependent data
1643    refs.  */
1644
1645 static bool
1646 operator == (const dr_with_seg_len& d1,
1647              const dr_with_seg_len& d2)
1648 {
1649   return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1650                            DR_BASE_ADDRESS (d2.dr), 0)
1651           && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1652           && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1653           && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1654           && known_eq (d1.access_size, d2.access_size)
1655           && d1.align == d2.align);
1656 }
1657
1658 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1659    so that we can combine aliasing checks in one scan.  */
1660
1661 static int
1662 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1663 {
1664   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1665   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1666   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1667   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1668
1669   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1670      if a and c have the same basic address snd step, and b and d have the same
1671      address and step.  Therefore, if any a&c or b&d don't have the same address
1672      and step, we don't care the order of those two pairs after sorting.  */
1673   int comp_res;
1674
1675   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1676                                          DR_BASE_ADDRESS (b1.dr))) != 0)
1677     return comp_res;
1678   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1679                                          DR_BASE_ADDRESS (b2.dr))) != 0)
1680     return comp_res;
1681   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1682                                          DR_STEP (b1.dr))) != 0)
1683     return comp_res;
1684   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1685                                          DR_STEP (b2.dr))) != 0)
1686     return comp_res;
1687   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1688                                          DR_OFFSET (b1.dr))) != 0)
1689     return comp_res;
1690   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1691                                          DR_INIT (b1.dr))) != 0)
1692     return comp_res;
1693   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1694                                          DR_OFFSET (b2.dr))) != 0)
1695     return comp_res;
1696   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1697                                          DR_INIT (b2.dr))) != 0)
1698     return comp_res;
1699
1700   return 0;
1701 }
1702
1703 /* Dump information about ALIAS_PAIR, indenting each line by INDENT.  */
1704
1705 static void
1706 dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1707 {
1708   dump_printf (MSG_NOTE, "%sreference:      %T vs. %T\n", indent,
1709                DR_REF (alias_pair->first.dr),
1710                DR_REF (alias_pair->second.dr));
1711
1712   dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1713                alias_pair->first.seg_len);
1714   if (!operand_equal_p (alias_pair->first.seg_len,
1715                         alias_pair->second.seg_len, 0))
1716     dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1717
1718   dump_printf (MSG_NOTE, "\n%saccess size:    ", indent);
1719   dump_dec (MSG_NOTE, alias_pair->first.access_size);
1720   if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1721     {
1722       dump_printf (MSG_NOTE, " vs. ");
1723       dump_dec (MSG_NOTE, alias_pair->second.access_size);
1724     }
1725
1726   dump_printf (MSG_NOTE, "\n%salignment:      %d", indent,
1727                alias_pair->first.align);
1728   if (alias_pair->first.align != alias_pair->second.align)
1729     dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1730
1731   dump_printf (MSG_NOTE, "\n%sflags:         ", indent);
1732   if (alias_pair->flags & DR_ALIAS_RAW)
1733     dump_printf (MSG_NOTE, " RAW");
1734   if (alias_pair->flags & DR_ALIAS_WAR)
1735     dump_printf (MSG_NOTE, " WAR");
1736   if (alias_pair->flags & DR_ALIAS_WAW)
1737     dump_printf (MSG_NOTE, " WAW");
1738   if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1739     dump_printf (MSG_NOTE, " ARBITRARY");
1740   if (alias_pair->flags & DR_ALIAS_SWAPPED)
1741     dump_printf (MSG_NOTE, " SWAPPED");
1742   if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1743     dump_printf (MSG_NOTE, " UNSWAPPED");
1744   if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1745     dump_printf (MSG_NOTE, " MIXED_STEPS");
1746   if (alias_pair->flags == 0)
1747     dump_printf (MSG_NOTE, " <none>");
1748   dump_printf (MSG_NOTE, "\n");
1749 }
1750
1751 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1752    FACTOR is number of iterations that each data reference is accessed.
1753
1754    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1755    we create an expression:
1756
1757    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1758    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1759
1760    for aliasing checks.  However, in some cases we can decrease the number
1761    of checks by combining two checks into one.  For example, suppose we have
1762    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1763    condition is satisfied:
1764
1765    load_ptr_0 < load_ptr_1  &&
1766    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1767
1768    (this condition means, in each iteration of vectorized loop, the accessed
1769    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1770    load_ptr_1.)
1771
1772    we then can use only the following expression to finish the alising checks
1773    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1774
1775    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1776    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1777
1778    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1779    basic address.  */
1780
1781 void
1782 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1783                                poly_uint64)
1784 {
1785   if (alias_pairs->is_empty ())
1786     return;
1787
1788   /* Canonicalize each pair so that the base components are ordered wrt
1789      data_ref_compare_tree.  This allows the loop below to merge more
1790      cases.  */
1791   unsigned int i;
1792   dr_with_seg_len_pair_t *alias_pair;
1793   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1794     {
1795       data_reference_p dr_a = alias_pair->first.dr;
1796       data_reference_p dr_b = alias_pair->second.dr;
1797       int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1798                                             DR_BASE_ADDRESS (dr_b));
1799       if (comp_res == 0)
1800         comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1801       if (comp_res == 0)
1802         comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1803       if (comp_res > 0)
1804         {
1805           std::swap (alias_pair->first, alias_pair->second);
1806           alias_pair->flags |= DR_ALIAS_SWAPPED;
1807         }
1808       else
1809         alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1810     }
1811
1812   /* Sort the collected data ref pairs so that we can scan them once to
1813      combine all possible aliasing checks.  */
1814   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1815
1816   /* Scan the sorted dr pairs and check if we can combine alias checks
1817      of two neighboring dr pairs.  */
1818   unsigned int last = 0;
1819   for (i = 1; i < alias_pairs->length (); ++i)
1820     {
1821       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1822       dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1823       dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1824
1825       dr_with_seg_len *dr_a1 = &alias_pair1->first;
1826       dr_with_seg_len *dr_b1 = &alias_pair1->second;
1827       dr_with_seg_len *dr_a2 = &alias_pair2->first;
1828       dr_with_seg_len *dr_b2 = &alias_pair2->second;
1829
1830       /* Remove duplicate data ref pairs.  */
1831       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1832         {
1833           if (dump_enabled_p ())
1834             dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1835                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1836                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1837           alias_pair1->flags |= alias_pair2->flags;
1838           continue;
1839         }
1840
1841       /* Assume that we won't be able to merge the pairs, then correct
1842          if we do.  */
1843       last += 1;
1844       if (last != i)
1845         (*alias_pairs)[last] = (*alias_pairs)[i];
1846
1847       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1848         {
1849           /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1850              and DR_A1 and DR_A2 are two consecutive memrefs.  */
1851           if (*dr_a1 == *dr_a2)
1852             {
1853               std::swap (dr_a1, dr_b1);
1854               std::swap (dr_a2, dr_b2);
1855             }
1856
1857           poly_int64 init_a1, init_a2;
1858           /* Only consider cases in which the distance between the initial
1859              DR_A1 and the initial DR_A2 is known at compile time.  */
1860           if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1861                                 DR_BASE_ADDRESS (dr_a2->dr), 0)
1862               || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1863                                    DR_OFFSET (dr_a2->dr), 0)
1864               || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1865               || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1866             continue;
1867
1868           /* Don't combine if we can't tell which one comes first.  */
1869           if (!ordered_p (init_a1, init_a2))
1870             continue;
1871
1872           /* Work out what the segment length would be if we did combine
1873              DR_A1 and DR_A2:
1874
1875              - If DR_A1 and DR_A2 have equal lengths, that length is
1876                also the combined length.
1877
1878              - If DR_A1 and DR_A2 both have negative "lengths", the combined
1879                length is the lower bound on those lengths.
1880
1881              - If DR_A1 and DR_A2 both have positive lengths, the combined
1882                length is the upper bound on those lengths.
1883
1884              Other cases are unlikely to give a useful combination.
1885
1886              The lengths both have sizetype, so the sign is taken from
1887              the step instead.  */
1888           poly_uint64 new_seg_len = 0;
1889           bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1890                                                  dr_a2->seg_len, 0);
1891           if (new_seg_len_p)
1892             {
1893               poly_uint64 seg_len_a1, seg_len_a2;
1894               if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1895                   || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1896                 continue;
1897
1898               tree indicator_a = dr_direction_indicator (dr_a1->dr);
1899               if (TREE_CODE (indicator_a) != INTEGER_CST)
1900                 continue;
1901
1902               tree indicator_b = dr_direction_indicator (dr_a2->dr);
1903               if (TREE_CODE (indicator_b) != INTEGER_CST)
1904                 continue;
1905
1906               int sign_a = tree_int_cst_sgn (indicator_a);
1907               int sign_b = tree_int_cst_sgn (indicator_b);
1908
1909               if (sign_a <= 0 && sign_b <= 0)
1910                 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1911               else if (sign_a >= 0 && sign_b >= 0)
1912                 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1913               else
1914                 continue;
1915             }
1916           /* At this point we're committed to merging the refs.  */
1917
1918           /* Make sure dr_a1 starts left of dr_a2.  */
1919           if (maybe_gt (init_a1, init_a2))
1920             {
1921               std::swap (*dr_a1, *dr_a2);
1922               std::swap (init_a1, init_a2);
1923             }
1924
1925           /* The DR_Bs are equal, so only the DR_As can introduce
1926              mixed steps.  */
1927           if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1928             alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1929
1930           if (new_seg_len_p)
1931             {
1932               dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1933                                               new_seg_len);
1934               dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1935             }
1936
1937           /* This is always positive due to the swap above.  */
1938           poly_uint64 diff = init_a2 - init_a1;
1939
1940           /* The new check will start at DR_A1.  Make sure that its access
1941              size encompasses the initial DR_A2.  */
1942           if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1943             {
1944               dr_a1->access_size = upper_bound (dr_a1->access_size,
1945                                                 diff + dr_a2->access_size);
1946               unsigned int new_align = known_alignment (dr_a1->access_size);
1947               dr_a1->align = MIN (dr_a1->align, new_align);
1948             }
1949           if (dump_enabled_p ())
1950             dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1951                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1952                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1953           alias_pair1->flags |= alias_pair2->flags;
1954           last -= 1;
1955         }
1956     }
1957   alias_pairs->truncate (last + 1);
1958
1959   /* Try to restore the original dr_with_seg_len order within each
1960      dr_with_seg_len_pair_t.  If we ended up combining swapped and
1961      unswapped pairs into the same check, we have to invalidate any
1962      RAW, WAR and WAW information for it.  */
1963   if (dump_enabled_p ())
1964     dump_printf (MSG_NOTE, "merged alias checks:\n");
1965   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1966     {
1967       unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1968       unsigned int swapped = (alias_pair->flags & swap_mask);
1969       if (swapped == DR_ALIAS_SWAPPED)
1970         std::swap (alias_pair->first, alias_pair->second);
1971       else if (swapped != DR_ALIAS_UNSWAPPED)
1972         alias_pair->flags |= DR_ALIAS_ARBITRARY;
1973       alias_pair->flags &= ~swap_mask;
1974       if (dump_enabled_p ())
1975         dump_alias_pair (alias_pair, "  ");
1976     }
1977 }
1978
1979 /* A subroutine of create_intersect_range_checks, with a subset of the
1980    same arguments.  Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
1981    to optimize cases in which the references form a simple RAW, WAR or
1982    WAR dependence.  */
1983
1984 static bool
1985 create_ifn_alias_checks (tree *cond_expr,
1986                          const dr_with_seg_len_pair_t &alias_pair)
1987 {
1988   const dr_with_seg_len& dr_a = alias_pair.first;
1989   const dr_with_seg_len& dr_b = alias_pair.second;
1990
1991   /* Check for cases in which:
1992
1993      (a) we have a known RAW, WAR or WAR dependence
1994      (b) the accesses are well-ordered in both the original and new code
1995          (see the comment above the DR_ALIAS_* flags for details); and
1996      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
1997   if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
1998     return false;
1999
2000   /* Make sure that both DRs access the same pattern of bytes,
2001      with a constant length and step.  */
2002   poly_uint64 seg_len;
2003   if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
2004       || !poly_int_tree_p (dr_a.seg_len, &seg_len)
2005       || maybe_ne (dr_a.access_size, dr_b.access_size)
2006       || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
2007       || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
2008     return false;
2009
2010   unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
2011   tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
2012   tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
2013
2014   /* See whether the target suports what we want to do.  WAW checks are
2015      equivalent to WAR checks here.  */
2016   internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
2017                      ? IFN_CHECK_RAW_PTRS
2018                      : IFN_CHECK_WAR_PTRS);
2019   unsigned int align = MIN (dr_a.align, dr_b.align);
2020   poly_uint64 full_length = seg_len + bytes;
2021   if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2022                                            full_length, align))
2023     {
2024       full_length = seg_len + dr_a.access_size;
2025       if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2026                                                full_length, align))
2027         return false;
2028     }
2029
2030   /* Commit to using this form of test.  */
2031   addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
2032   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2033
2034   addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
2035   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2036
2037   *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
2038                                              ifn, boolean_type_node,
2039                                              4, addr_a, addr_b,
2040                                              size_int (full_length),
2041                                              size_int (align));
2042
2043   if (dump_enabled_p ())
2044     {
2045       if (ifn == IFN_CHECK_RAW_PTRS)
2046         dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
2047       else
2048         dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
2049     }
2050   return true;
2051 }
2052
2053 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
2054    free of aliases, using a condition based on index values instead
2055    of a condition based on addresses.  Return true on success,
2056    storing the condition in *COND_EXPR.
2057
2058    This can only be done if the two data references in ALIAS_PAIR access
2059    the same array object and the index is the only difference.  For example,
2060    if the two data references are DR_A and DR_B:
2061
2062                        DR_A                           DR_B
2063       data-ref         arr[i]                         arr[j]
2064       base_object      arr                            arr
2065       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
2066
2067    The addresses and their index are like:
2068
2069         |<- ADDR_A    ->|          |<- ADDR_B    ->|
2070      ------------------------------------------------------->
2071         |   |   |   |   |          |   |   |   |   |
2072      ------------------------------------------------------->
2073         i_0 ...         i_0+4      j_0 ...         j_0+4
2074
2075    We can create expression based on index rather than address:
2076
2077      (unsigned) (i_0 - j_0 + 3) <= 6
2078
2079    i.e. the indices are less than 4 apart.
2080
2081    Note evolution step of index needs to be considered in comparison.  */
2082
2083 static bool
2084 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
2085                                      const dr_with_seg_len_pair_t &alias_pair)
2086 {
2087   const dr_with_seg_len &dr_a = alias_pair.first;
2088   const dr_with_seg_len &dr_b = alias_pair.second;
2089   if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
2090       || integer_zerop (DR_STEP (dr_a.dr))
2091       || integer_zerop (DR_STEP (dr_b.dr))
2092       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2093     return false;
2094
2095   poly_uint64 seg_len1, seg_len2;
2096   if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
2097       || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
2098     return false;
2099
2100   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2101     return false;
2102
2103   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2104     return false;
2105
2106   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2107     return false;
2108
2109   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2110
2111   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2112   unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
2113   if (neg_step)
2114     {
2115       abs_step = -abs_step;
2116       seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
2117       seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
2118     }
2119
2120   /* Infer the number of iterations with which the memory segment is accessed
2121      by DR.  In other words, alias is checked if memory segment accessed by
2122      DR_A in some iterations intersect with memory segment accessed by DR_B
2123      in the same amount iterations.
2124      Note segnment length is a linear function of number of iterations with
2125      DR_STEP as the coefficient.  */
2126   poly_uint64 niter_len1, niter_len2;
2127   if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
2128       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
2129     return false;
2130
2131   /* Divide each access size by the byte step, rounding up.  */
2132   poly_uint64 niter_access1, niter_access2;
2133   if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
2134                         abs_step, &niter_access1)
2135       || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
2136                            abs_step, &niter_access2))
2137     return false;
2138
2139   bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
2140
2141   int found = -1;
2142   for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2143     {
2144       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2145       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2146       /* Two indices must be the same if they are not scev, or not scev wrto
2147          current loop being vecorized.  */
2148       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2149           || TREE_CODE (access2) != POLYNOMIAL_CHREC
2150           || CHREC_VARIABLE (access1) != (unsigned)loop->num
2151           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2152         {
2153           if (operand_equal_p (access1, access2, 0))
2154             continue;
2155
2156           return false;
2157         }
2158       if (found >= 0)
2159         return false;
2160       found = i;
2161     }
2162
2163   /* Ought not to happen in practice, since if all accesses are equal then the
2164      alias should be decidable at compile time.  */
2165   if (found < 0)
2166     return false;
2167
2168   /* The two indices must have the same step.  */
2169   tree access1 = DR_ACCESS_FN (dr_a.dr, found);
2170   tree access2 = DR_ACCESS_FN (dr_b.dr, found);
2171   if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2172     return false;
2173
2174   tree idx_step = CHREC_RIGHT (access1);
2175   /* Index must have const step, otherwise DR_STEP won't be constant.  */
2176   gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2177   /* Index must evaluate in the same direction as DR.  */
2178   gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2179
2180   tree min1 = CHREC_LEFT (access1);
2181   tree min2 = CHREC_LEFT (access2);
2182   if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2183     return false;
2184
2185   /* Ideally, alias can be checked against loop's control IV, but we
2186      need to prove linear mapping between control IV and reference
2187      index.  Although that should be true, we check against (array)
2188      index of data reference.  Like segment length, index length is
2189      linear function of the number of iterations with index_step as
2190      the coefficient, i.e, niter_len * idx_step.  */
2191   offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
2192                                               SIGNED);
2193   if (neg_step)
2194     abs_idx_step = -abs_idx_step;
2195   poly_offset_int idx_len1 = abs_idx_step * niter_len1;
2196   poly_offset_int idx_len2 = abs_idx_step * niter_len2;
2197   poly_offset_int idx_access1 = abs_idx_step * niter_access1;
2198   poly_offset_int idx_access2 = abs_idx_step * niter_access2;
2199
2200   gcc_assert (known_ge (idx_len1, 0)
2201               && known_ge (idx_len2, 0)
2202               && known_ge (idx_access1, 0)
2203               && known_ge (idx_access2, 0));
2204
2205   /* Each access has the following pattern, with lengths measured
2206      in units of INDEX:
2207
2208           <-- idx_len -->
2209           <--- A: -ve step --->
2210           +-----+-------+-----+-------+-----+
2211           | n-1 | ..... |  0  | ..... | n-1 |
2212           +-----+-------+-----+-------+-----+
2213                         <--- B: +ve step --->
2214                         <-- idx_len -->
2215                         |
2216                        min
2217
2218      where "n" is the number of scalar iterations covered by the segment
2219      and where each access spans idx_access units.
2220
2221      A is the range of bytes accessed when the step is negative,
2222      B is the range when the step is positive.
2223
2224      When checking for general overlap, we need to test whether
2225      the range:
2226
2227        [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2228
2229      overlaps:
2230
2231        [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2232
2233      where:
2234
2235         low_offsetN = +ve step ? 0 : -idx_lenN;
2236        high_offsetN = +ve step ? idx_lenN : 0;
2237
2238      This is equivalent to testing whether:
2239
2240        min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2241        && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2242
2243      Converting this into a single test, there is an overlap if:
2244
2245        0 <= min2 - min1 + bias <= limit
2246
2247      where  bias = high_offset2 + idx_access2 - 1 - low_offset1
2248            limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2249                  + (high_offset2 - low_offset2 + idx_access2 - 1)
2250       i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2251
2252      Combining the tests requires limit to be computable in an unsigned
2253      form of the index type; if it isn't, we fall back to the usual
2254      pointer-based checks.
2255
2256      We can do better if DR_B is a write and if DR_A and DR_B are
2257      well-ordered in both the original and the new code (see the
2258      comment above the DR_ALIAS_* flags for details).  In this case
2259      we know that for each i in [0, n-1], the write performed by
2260      access i of DR_B occurs after access numbers j<=i of DR_A in
2261      both the original and the new code.  Any write or anti
2262      dependencies wrt those DR_A accesses are therefore maintained.
2263
2264      We just need to make sure that each individual write in DR_B does not
2265      overlap any higher-indexed access in DR_A; such DR_A accesses happen
2266      after the DR_B access in the original code but happen before it in
2267      the new code.
2268
2269      We know the steps for both accesses are equal, so by induction, we
2270      just need to test whether the first write of DR_B overlaps a later
2271      access of DR_A.  In other words, we need to move min1 along by
2272      one iteration:
2273
2274        min1' = min1 + idx_step
2275
2276      and use the ranges:
2277
2278        [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2279
2280      and:
2281
2282        [min2, min2 + idx_access2 - 1]
2283
2284      where:
2285
2286         low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2287        high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0.  */
2288   if (waw_or_war_p)
2289     idx_len1 -= abs_idx_step;
2290
2291   poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2292   if (!waw_or_war_p)
2293     limit += idx_len2;
2294
2295   tree utype = unsigned_type_for (TREE_TYPE (min1));
2296   if (!wi::fits_to_tree_p (limit, utype))
2297     return false;
2298
2299   poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2300   poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2301   poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2302   /* Equivalent to adding IDX_STEP to MIN1.  */
2303   if (waw_or_war_p)
2304     bias -= wi::to_offset (idx_step);
2305
2306   tree subject = fold_build2 (MINUS_EXPR, utype,
2307                               fold_convert (utype, min2),
2308                               fold_convert (utype, min1));
2309   subject = fold_build2 (PLUS_EXPR, utype, subject,
2310                          wide_int_to_tree (utype, bias));
2311   tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2312                                      wide_int_to_tree (utype, limit));
2313   if (*cond_expr)
2314     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2315                               *cond_expr, part_cond_expr);
2316   else
2317     *cond_expr = part_cond_expr;
2318   if (dump_enabled_p ())
2319     {
2320       if (waw_or_war_p)
2321         dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2322       else
2323         dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2324     }
2325   return true;
2326 }
2327
2328 /* A subroutine of create_intersect_range_checks, with a subset of the
2329    same arguments.  Try to optimize cases in which the second access
2330    is a write and in which some overlap is valid.  */
2331
2332 static bool
2333 create_waw_or_war_checks (tree *cond_expr,
2334                           const dr_with_seg_len_pair_t &alias_pair)
2335 {
2336   const dr_with_seg_len& dr_a = alias_pair.first;
2337   const dr_with_seg_len& dr_b = alias_pair.second;
2338
2339   /* Check for cases in which:
2340
2341      (a) DR_B is always a write;
2342      (b) the accesses are well-ordered in both the original and new code
2343          (see the comment above the DR_ALIAS_* flags for details); and
2344      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
2345   if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2346     return false;
2347
2348   /* Check for equal (but possibly variable) steps.  */
2349   tree step = DR_STEP (dr_a.dr);
2350   if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2351     return false;
2352
2353   /* Make sure that we can operate on sizetype without loss of precision.  */
2354   tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2355   if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2356     return false;
2357
2358   /* All addresses involved are known to have a common alignment ALIGN.
2359      We can therefore subtract ALIGN from an exclusive endpoint to get
2360      an inclusive endpoint.  In the best (and common) case, ALIGN is the
2361      same as the access sizes of both DRs, and so subtracting ALIGN
2362      cancels out the addition of an access size.  */
2363   unsigned int align = MIN (dr_a.align, dr_b.align);
2364   poly_uint64 last_chunk_a = dr_a.access_size - align;
2365   poly_uint64 last_chunk_b = dr_b.access_size - align;
2366
2367   /* Get a boolean expression that is true when the step is negative.  */
2368   tree indicator = dr_direction_indicator (dr_a.dr);
2369   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2370                                fold_convert (ssizetype, indicator),
2371                                ssize_int (0));
2372
2373   /* Get lengths in sizetype.  */
2374   tree seg_len_a
2375     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2376   step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2377
2378   /* Each access has the following pattern:
2379
2380           <- |seg_len| ->
2381           <--- A: -ve step --->
2382           +-----+-------+-----+-------+-----+
2383           | n-1 | ..... |  0  | ..... | n-1 |
2384           +-----+-------+-----+-------+-----+
2385                         <--- B: +ve step --->
2386                         <- |seg_len| ->
2387                         |
2388                    base address
2389
2390      where "n" is the number of scalar iterations covered by the segment.
2391
2392      A is the range of bytes accessed when the step is negative,
2393      B is the range when the step is positive.
2394
2395      We know that DR_B is a write.  We also know (from checking that
2396      DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2397      the write performed by access i of DR_B occurs after access numbers
2398      j<=i of DR_A in both the original and the new code.  Any write or
2399      anti dependencies wrt those DR_A accesses are therefore maintained.
2400
2401      We just need to make sure that each individual write in DR_B does not
2402      overlap any higher-indexed access in DR_A; such DR_A accesses happen
2403      after the DR_B access in the original code but happen before it in
2404      the new code.
2405
2406      We know the steps for both accesses are equal, so by induction, we
2407      just need to test whether the first write of DR_B overlaps a later
2408      access of DR_A.  In other words, we need to move addr_a along by
2409      one iteration:
2410
2411        addr_a' = addr_a + step
2412
2413      and check whether:
2414
2415        [addr_b, addr_b + last_chunk_b]
2416
2417      overlaps:
2418
2419        [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2420
2421      where [low_offset_a, high_offset_a] spans accesses [1, n-1].  I.e.:
2422
2423         low_offset_a = +ve step ? 0 : seg_len_a - step
2424        high_offset_a = +ve step ? seg_len_a - step : 0
2425
2426      This is equivalent to testing whether:
2427
2428        addr_a' + low_offset_a <= addr_b + last_chunk_b
2429        && addr_b <= addr_a' + high_offset_a + last_chunk_a
2430
2431      Converting this into a single test, there is an overlap if:
2432
2433        0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2434
2435      where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2436
2437      If DR_A is performed, limit + |step| - last_chunk_b is known to be
2438      less than the size of the object underlying DR_A.  We also know
2439      that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2440      guaranteed at compile time.  There can therefore be no overflow if
2441      "limit" is calculated in an unsigned type with pointer precision.  */
2442   tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2443                                          DR_OFFSET (dr_a.dr));
2444   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2445
2446   tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2447                                          DR_OFFSET (dr_b.dr));
2448   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2449
2450   /* Advance ADDR_A by one iteration and adjust the length to compensate.  */
2451   addr_a = fold_build_pointer_plus (addr_a, step);
2452   tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2453                                            seg_len_a, step);
2454   if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2455     seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2456
2457   tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2458                                    seg_len_a_minus_step, size_zero_node);
2459   if (!CONSTANT_CLASS_P (low_offset_a))
2460     low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2461
2462   /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2463      but it's usually more efficient to reuse the LOW_OFFSET_A result.  */
2464   tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2465                                     low_offset_a);
2466
2467   /* The amount added to addr_b - addr_a'.  */
2468   tree bias = fold_build2 (MINUS_EXPR, sizetype,
2469                            size_int (last_chunk_b), low_offset_a);
2470
2471   tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2472   limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2473                        size_int (last_chunk_a + last_chunk_b));
2474
2475   tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
2476   subject = fold_build2 (PLUS_EXPR, sizetype,
2477                          fold_convert (sizetype, subject), bias);
2478
2479   *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2480   if (dump_enabled_p ())
2481     dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2482   return true;
2483 }
2484
2485 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2486    every address ADDR accessed by D:
2487
2488      *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2489
2490    In this case, every element accessed by D is aligned to at least
2491    ALIGN bytes.
2492
2493    If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2494
2495      *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT.  */
2496
2497 static void
2498 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2499                      tree *seg_max_out, HOST_WIDE_INT align)
2500 {
2501   /* Each access has the following pattern:
2502
2503           <- |seg_len| ->
2504           <--- A: -ve step --->
2505           +-----+-------+-----+-------+-----+
2506           | n-1 | ,.... |  0  | ..... | n-1 |
2507           +-----+-------+-----+-------+-----+
2508                         <--- B: +ve step --->
2509                         <- |seg_len| ->
2510                         |
2511                    base address
2512
2513      where "n" is the number of scalar iterations covered by the segment.
2514      (This should be VF for a particular pair if we know that both steps
2515      are the same, otherwise it will be the full number of scalar loop
2516      iterations.)
2517
2518      A is the range of bytes accessed when the step is negative,
2519      B is the range when the step is positive.
2520
2521      If the access size is "access_size" bytes, the lowest addressed byte is:
2522
2523          base + (step < 0 ? seg_len : 0)   [LB]
2524
2525      and the highest addressed byte is always below:
2526
2527          base + (step < 0 ? 0 : seg_len) + access_size   [UB]
2528
2529      Thus:
2530
2531          LB <= ADDR < UB
2532
2533      If ALIGN is nonzero, all three values are aligned to at least ALIGN
2534      bytes, so:
2535
2536          LB <= ADDR <= UB - ALIGN
2537
2538      where "- ALIGN" folds naturally with the "+ access_size" and often
2539      cancels it out.
2540
2541      We don't try to simplify LB and UB beyond this (e.g. by using
2542      MIN and MAX based on whether seg_len rather than the stride is
2543      negative) because it is possible for the absolute size of the
2544      segment to overflow the range of a ssize_t.
2545
2546      Keeping the pointer_plus outside of the cond_expr should allow
2547      the cond_exprs to be shared with other alias checks.  */
2548   tree indicator = dr_direction_indicator (d.dr);
2549   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2550                                fold_convert (ssizetype, indicator),
2551                                ssize_int (0));
2552   tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2553                                             DR_OFFSET (d.dr));
2554   addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2555   tree seg_len
2556     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2557
2558   tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2559                                 seg_len, size_zero_node);
2560   tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2561                                 size_zero_node, seg_len);
2562   max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2563                            size_int (d.access_size - align));
2564
2565   *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2566   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2567 }
2568
2569 /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2570    storing the condition in *COND_EXPR.  The fallback is to generate a
2571    a test that the two accesses do not overlap:
2572
2573      end_a <= start_b || end_b <= start_a.  */
2574
2575 static void
2576 create_intersect_range_checks (class loop *loop, tree *cond_expr,
2577                                const dr_with_seg_len_pair_t &alias_pair)
2578 {
2579   const dr_with_seg_len& dr_a = alias_pair.first;
2580   const dr_with_seg_len& dr_b = alias_pair.second;
2581   *cond_expr = NULL_TREE;
2582   if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2583     return;
2584
2585   if (create_ifn_alias_checks (cond_expr, alias_pair))
2586     return;
2587
2588   if (create_waw_or_war_checks (cond_expr, alias_pair))
2589     return;
2590
2591   unsigned HOST_WIDE_INT min_align;
2592   tree_code cmp_code;
2593   /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2594      are equivalent.  This is just an optimization heuristic.  */
2595   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2596       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2597     {
2598       /* In this case adding access_size to seg_len is likely to give
2599          a simple X * step, where X is either the number of scalar
2600          iterations or the vectorization factor.  We're better off
2601          keeping that, rather than subtracting an alignment from it.
2602
2603          In this case the maximum values are exclusive and so there is
2604          no alias if the maximum of one segment equals the minimum
2605          of another.  */
2606       min_align = 0;
2607       cmp_code = LE_EXPR;
2608     }
2609   else
2610     {
2611       /* Calculate the minimum alignment shared by all four pointers,
2612          then arrange for this alignment to be subtracted from the
2613          exclusive maximum values to get inclusive maximum values.
2614          This "- min_align" is cumulative with a "+ access_size"
2615          in the calculation of the maximum values.  In the best
2616          (and common) case, the two cancel each other out, leaving
2617          us with an inclusive bound based only on seg_len.  In the
2618          worst case we're simply adding a smaller number than before.
2619
2620          Because the maximum values are inclusive, there is an alias
2621          if the maximum value of one segment is equal to the minimum
2622          value of the other.  */
2623       min_align = MIN (dr_a.align, dr_b.align);
2624       cmp_code = LT_EXPR;
2625     }
2626
2627   tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2628   get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2629   get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2630
2631   *cond_expr
2632     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2633         fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2634         fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2635   if (dump_enabled_p ())
2636     dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2637 }
2638
2639 /* Create a conditional expression that represents the run-time checks for
2640    overlapping of address ranges represented by a list of data references
2641    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
2642    COND_EXPR is the conditional expression to be used in the if statement
2643    that controls which version of the loop gets executed at runtime.  */
2644
2645 void
2646 create_runtime_alias_checks (class loop *loop,
2647                              const vec<dr_with_seg_len_pair_t> *alias_pairs,
2648                              tree * cond_expr)
2649 {
2650   tree part_cond_expr;
2651
2652   fold_defer_overflow_warnings ();
2653   for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
2654     {
2655       gcc_assert (alias_pair.flags);
2656       if (dump_enabled_p ())
2657         dump_printf (MSG_NOTE,
2658                      "create runtime check for data references %T and %T\n",
2659                      DR_REF (alias_pair.first.dr),
2660                      DR_REF (alias_pair.second.dr));
2661
2662       /* Create condition expression for each pair data references.  */
2663       create_intersect_range_checks (loop, &part_cond_expr, alias_pair);
2664       if (*cond_expr)
2665         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2666                                   *cond_expr, part_cond_expr);
2667       else
2668         *cond_expr = part_cond_expr;
2669     }
2670   fold_undefer_and_ignore_overflow_warnings ();
2671 }
2672
2673 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2674    expressions.  */
2675 static bool
2676 dr_equal_offsets_p1 (tree offset1, tree offset2)
2677 {
2678   bool res;
2679
2680   STRIP_NOPS (offset1);
2681   STRIP_NOPS (offset2);
2682
2683   if (offset1 == offset2)
2684     return true;
2685
2686   if (TREE_CODE (offset1) != TREE_CODE (offset2)
2687       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2688     return false;
2689
2690   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2691                              TREE_OPERAND (offset2, 0));
2692
2693   if (!res || !BINARY_CLASS_P (offset1))
2694     return res;
2695
2696   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2697                              TREE_OPERAND (offset2, 1));
2698
2699   return res;
2700 }
2701
2702 /* Check if DRA and DRB have equal offsets.  */
2703 bool
2704 dr_equal_offsets_p (struct data_reference *dra,
2705                     struct data_reference *drb)
2706 {
2707   tree offset1, offset2;
2708
2709   offset1 = DR_OFFSET (dra);
2710   offset2 = DR_OFFSET (drb);
2711
2712   return dr_equal_offsets_p1 (offset1, offset2);
2713 }
2714
2715 /* Returns true if FNA == FNB.  */
2716
2717 static bool
2718 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2719 {
2720   unsigned i, n = fna.length ();
2721
2722   if (n != fnb.length ())
2723     return false;
2724
2725   for (i = 0; i < n; i++)
2726     if (!operand_equal_p (fna[i], fnb[i], 0))
2727       return false;
2728
2729   return true;
2730 }
2731
2732 /* If all the functions in CF are the same, returns one of them,
2733    otherwise returns NULL.  */
2734
2735 static affine_fn
2736 common_affine_function (conflict_function *cf)
2737 {
2738   unsigned i;
2739   affine_fn comm;
2740
2741   if (!CF_NONTRIVIAL_P (cf))
2742     return affine_fn ();
2743
2744   comm = cf->fns[0];
2745
2746   for (i = 1; i < cf->n; i++)
2747     if (!affine_function_equal_p (comm, cf->fns[i]))
2748       return affine_fn ();
2749
2750   return comm;
2751 }
2752
2753 /* Returns the base of the affine function FN.  */
2754
2755 static tree
2756 affine_function_base (affine_fn fn)
2757 {
2758   return fn[0];
2759 }
2760
2761 /* Returns true if FN is a constant.  */
2762
2763 static bool
2764 affine_function_constant_p (affine_fn fn)
2765 {
2766   unsigned i;
2767   tree coef;
2768
2769   for (i = 1; fn.iterate (i, &coef); i++)
2770     if (!integer_zerop (coef))
2771       return false;
2772
2773   return true;
2774 }
2775
2776 /* Returns true if FN is the zero constant function.  */
2777
2778 static bool
2779 affine_function_zero_p (affine_fn fn)
2780 {
2781   return (integer_zerop (affine_function_base (fn))
2782           && affine_function_constant_p (fn));
2783 }
2784
2785 /* Returns a signed integer type with the largest precision from TA
2786    and TB.  */
2787
2788 static tree
2789 signed_type_for_types (tree ta, tree tb)
2790 {
2791   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2792     return signed_type_for (ta);
2793   else
2794     return signed_type_for (tb);
2795 }
2796
2797 /* Applies operation OP on affine functions FNA and FNB, and returns the
2798    result.  */
2799
2800 static affine_fn
2801 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2802 {
2803   unsigned i, n, m;
2804   affine_fn ret;
2805   tree coef;
2806
2807   if (fnb.length () > fna.length ())
2808     {
2809       n = fna.length ();
2810       m = fnb.length ();
2811     }
2812   else
2813     {
2814       n = fnb.length ();
2815       m = fna.length ();
2816     }
2817
2818   ret.create (m);
2819   for (i = 0; i < n; i++)
2820     {
2821       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2822                                          TREE_TYPE (fnb[i]));
2823       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2824     }
2825
2826   for (; fna.iterate (i, &coef); i++)
2827     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2828                                  coef, integer_zero_node));
2829   for (; fnb.iterate (i, &coef); i++)
2830     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2831                                  integer_zero_node, coef));
2832
2833   return ret;
2834 }
2835
2836 /* Returns the sum of affine functions FNA and FNB.  */
2837
2838 static affine_fn
2839 affine_fn_plus (affine_fn fna, affine_fn fnb)
2840 {
2841   return affine_fn_op (PLUS_EXPR, fna, fnb);
2842 }
2843
2844 /* Returns the difference of affine functions FNA and FNB.  */
2845
2846 static affine_fn
2847 affine_fn_minus (affine_fn fna, affine_fn fnb)
2848 {
2849   return affine_fn_op (MINUS_EXPR, fna, fnb);
2850 }
2851
2852 /* Frees affine function FN.  */
2853
2854 static void
2855 affine_fn_free (affine_fn fn)
2856 {
2857   fn.release ();
2858 }
2859
2860 /* Determine for each subscript in the data dependence relation DDR
2861    the distance.  */
2862
2863 static void
2864 compute_subscript_distance (struct data_dependence_relation *ddr)
2865 {
2866   conflict_function *cf_a, *cf_b;
2867   affine_fn fn_a, fn_b, diff;
2868
2869   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2870     {
2871       unsigned int i;
2872
2873       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2874         {
2875           struct subscript *subscript;
2876
2877           subscript = DDR_SUBSCRIPT (ddr, i);
2878           cf_a = SUB_CONFLICTS_IN_A (subscript);
2879           cf_b = SUB_CONFLICTS_IN_B (subscript);
2880
2881           fn_a = common_affine_function (cf_a);
2882           fn_b = common_affine_function (cf_b);
2883           if (!fn_a.exists () || !fn_b.exists ())
2884             {
2885               SUB_DISTANCE (subscript) = chrec_dont_know;
2886               return;
2887             }
2888           diff = affine_fn_minus (fn_a, fn_b);
2889
2890           if (affine_function_constant_p (diff))
2891             SUB_DISTANCE (subscript) = affine_function_base (diff);
2892           else
2893             SUB_DISTANCE (subscript) = chrec_dont_know;
2894
2895           affine_fn_free (diff);
2896         }
2897     }
2898 }
2899
2900 /* Returns the conflict function for "unknown".  */
2901
2902 static conflict_function *
2903 conflict_fn_not_known (void)
2904 {
2905   conflict_function *fn = XCNEW (conflict_function);
2906   fn->n = NOT_KNOWN;
2907
2908   return fn;
2909 }
2910
2911 /* Returns the conflict function for "independent".  */
2912
2913 static conflict_function *
2914 conflict_fn_no_dependence (void)
2915 {
2916   conflict_function *fn = XCNEW (conflict_function);
2917   fn->n = NO_DEPENDENCE;
2918
2919   return fn;
2920 }
2921
2922 /* Returns true if the address of OBJ is invariant in LOOP.  */
2923
2924 static bool
2925 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2926 {
2927   while (handled_component_p (obj))
2928     {
2929       if (TREE_CODE (obj) == ARRAY_REF)
2930         {
2931           for (int i = 1; i < 4; ++i)
2932             if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2933                                                         loop->num))
2934               return false;
2935         }
2936       else if (TREE_CODE (obj) == COMPONENT_REF)
2937         {
2938           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2939                                                       loop->num))
2940             return false;
2941         }
2942       obj = TREE_OPERAND (obj, 0);
2943     }
2944
2945   if (!INDIRECT_REF_P (obj)
2946       && TREE_CODE (obj) != MEM_REF)
2947     return true;
2948
2949   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2950                                                   loop->num);
2951 }
2952
2953 /* Returns false if we can prove that data references A and B do not alias,
2954    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2955    considered.  */
2956
2957 bool
2958 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2959                 class loop *loop_nest)
2960 {
2961   tree addr_a = DR_BASE_OBJECT (a);
2962   tree addr_b = DR_BASE_OBJECT (b);
2963
2964   /* If we are not processing a loop nest but scalar code we
2965      do not need to care about possible cross-iteration dependences
2966      and thus can process the full original reference.  Do so,
2967      similar to how loop invariant motion applies extra offset-based
2968      disambiguation.  */
2969   if (!loop_nest)
2970     {
2971       aff_tree off1, off2;
2972       poly_widest_int size1, size2;
2973       get_inner_reference_aff (DR_REF (a), &off1, &size1);
2974       get_inner_reference_aff (DR_REF (b), &off2, &size2);
2975       aff_combination_scale (&off1, -1);
2976       aff_combination_add (&off2, &off1);
2977       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2978         return false;
2979     }
2980
2981   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2982       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2983       /* For cross-iteration dependences the cliques must be valid for the
2984          whole loop, not just individual iterations.  */
2985       && (!loop_nest
2986           || MR_DEPENDENCE_CLIQUE (addr_a) == 1
2987           || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
2988       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2989       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2990     return false;
2991
2992   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2993      do not know the size of the base-object.  So we cannot do any
2994      offset/overlap based analysis but have to rely on points-to
2995      information only.  */
2996   if (TREE_CODE (addr_a) == MEM_REF
2997       && (DR_UNCONSTRAINED_BASE (a)
2998           || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2999     {
3000       /* For true dependences we can apply TBAA.  */
3001       if (flag_strict_aliasing
3002           && DR_IS_WRITE (a) && DR_IS_READ (b)
3003           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3004                                      get_alias_set (DR_REF (b))))
3005         return false;
3006       if (TREE_CODE (addr_b) == MEM_REF)
3007         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3008                                        TREE_OPERAND (addr_b, 0));
3009       else
3010         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3011                                        build_fold_addr_expr (addr_b));
3012     }
3013   else if (TREE_CODE (addr_b) == MEM_REF
3014            && (DR_UNCONSTRAINED_BASE (b)
3015                || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
3016     {
3017       /* For true dependences we can apply TBAA.  */
3018       if (flag_strict_aliasing
3019           && DR_IS_WRITE (a) && DR_IS_READ (b)
3020           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3021                                      get_alias_set (DR_REF (b))))
3022         return false;
3023       if (TREE_CODE (addr_a) == MEM_REF)
3024         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3025                                        TREE_OPERAND (addr_b, 0));
3026       else
3027         return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3028                                        TREE_OPERAND (addr_b, 0));
3029     }
3030
3031   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3032      that is being subsetted in the loop nest.  */
3033   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
3034     return refs_output_dependent_p (addr_a, addr_b);
3035   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
3036     return refs_anti_dependent_p (addr_a, addr_b);
3037   return refs_may_alias_p (addr_a, addr_b);
3038 }
3039
3040 /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
3041    if it is meaningful to compare their associated access functions
3042    when checking for dependencies.  */
3043
3044 static bool
3045 access_fn_components_comparable_p (tree ref_a, tree ref_b)
3046 {
3047   /* Allow pairs of component refs from the following sets:
3048
3049        { REALPART_EXPR, IMAGPART_EXPR }
3050        { COMPONENT_REF }
3051        { ARRAY_REF }.  */
3052   tree_code code_a = TREE_CODE (ref_a);
3053   tree_code code_b = TREE_CODE (ref_b);
3054   if (code_a == IMAGPART_EXPR)
3055     code_a = REALPART_EXPR;
3056   if (code_b == IMAGPART_EXPR)
3057     code_b = REALPART_EXPR;
3058   if (code_a != code_b)
3059     return false;
3060
3061   if (TREE_CODE (ref_a) == COMPONENT_REF)
3062     /* ??? We cannot simply use the type of operand #0 of the refs here as
3063        the Fortran compiler smuggles type punning into COMPONENT_REFs.
3064        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
3065     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
3066             == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
3067
3068   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
3069                              TREE_TYPE (TREE_OPERAND (ref_b, 0)));
3070 }
3071
3072 /* Initialize a data dependence relation RES in LOOP_NEST.  USE_ALT_INDICES
3073    is true when the main indices of A and B were not comparable so we try again
3074    with alternate indices computed on an indirect reference.  */
3075
3076 struct data_dependence_relation *
3077 initialize_data_dependence_relation (struct data_dependence_relation *res,
3078                                      vec<loop_p> loop_nest,
3079                                      bool use_alt_indices)
3080 {
3081   struct data_reference *a = DDR_A (res);
3082   struct data_reference *b = DDR_B (res);
3083   unsigned int i;
3084
3085   struct indices *indices_a = &a->indices;
3086   struct indices *indices_b = &b->indices;
3087   if (use_alt_indices)
3088     {
3089       if (TREE_CODE (DR_REF (a)) != MEM_REF)
3090         indices_a = &a->alt_indices;
3091       if (TREE_CODE (DR_REF (b)) != MEM_REF)
3092         indices_b = &b->alt_indices;
3093     }
3094   unsigned int num_dimensions_a = indices_a->access_fns.length ();
3095   unsigned int num_dimensions_b = indices_b->access_fns.length ();
3096   if (num_dimensions_a == 0 || num_dimensions_b == 0)
3097     {
3098       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3099       return res;
3100     }
3101
3102   /* For unconstrained bases, the root (highest-indexed) subscript
3103      describes a variation in the base of the original DR_REF rather
3104      than a component access.  We have no type that accurately describes
3105      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3106      applying this subscript) so limit the search to the last real
3107      component access.
3108
3109      E.g. for:
3110
3111         void
3112         f (int a[][8], int b[][8])
3113         {
3114           for (int i = 0; i < 8; ++i)
3115             a[i * 2][0] = b[i][0];
3116         }
3117
3118      the a and b accesses have a single ARRAY_REF component reference [0]
3119      but have two subscripts.  */
3120   if (indices_a->unconstrained_base)
3121     num_dimensions_a -= 1;
3122   if (indices_b->unconstrained_base)
3123     num_dimensions_b -= 1;
3124
3125   /* These structures describe sequences of component references in
3126      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
3127      specific access function.  */
3128   struct {
3129     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3130        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3131        indices.  In C notation, these are the indices of the rightmost
3132        component references; e.g. for a sequence .b.c.d, the start
3133        index is for .d.  */
3134     unsigned int start_a;
3135     unsigned int start_b;
3136
3137     /* The sequence contains LENGTH consecutive access functions from
3138        each DR.  */
3139     unsigned int length;
3140
3141     /* The enclosing objects for the A and B sequences respectively,
3142        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3143        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
3144     tree object_a;
3145     tree object_b;
3146   } full_seq = {}, struct_seq = {};
3147
3148   /* Before each iteration of the loop:
3149
3150      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3151      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
3152   unsigned int index_a = 0;
3153   unsigned int index_b = 0;
3154   tree ref_a = DR_REF (a);
3155   tree ref_b = DR_REF (b);
3156
3157   /* Now walk the component references from the final DR_REFs back up to
3158      the enclosing base objects.  Each component reference corresponds
3159      to one access function in the DR, with access function 0 being for
3160      the final DR_REF and the highest-indexed access function being the
3161      one that is applied to the base of the DR.
3162
3163      Look for a sequence of component references whose access functions
3164      are comparable (see access_fn_components_comparable_p).  If more
3165      than one such sequence exists, pick the one nearest the base
3166      (which is the leftmost sequence in C notation).  Store this sequence
3167      in FULL_SEQ.
3168
3169      For example, if we have:
3170
3171         struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3172
3173         A: a[0][i].s.c.d
3174         B: __real b[0][i].s.e[i].f
3175
3176      (where d is the same type as the real component of f) then the access
3177      functions would be:
3178
3179                          0   1   2   3
3180         A:              .d  .c  .s [i]
3181
3182                  0   1   2   3   4   5
3183         B:  __real  .f [i]  .e  .s [i]
3184
3185      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3186      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
3187      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
3188      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3189      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
3190      index foo[10] arrays, so is again comparable.  The sequence is
3191      therefore:
3192
3193         A: [1, 3]  (i.e. [i].s.c)
3194         B: [3, 5]  (i.e. [i].s.e)
3195
3196      Also look for sequences of component references whose access
3197      functions are comparable and whose enclosing objects have the same
3198      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
3199      example, STRUCT_SEQ would be:
3200
3201         A: [1, 2]  (i.e. s.c)
3202         B: [3, 4]  (i.e. s.e)  */
3203   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
3204     {
3205       /* The alternate indices form always has a single dimension
3206          with unconstrained base.  */
3207       gcc_assert (!use_alt_indices);
3208
3209       /* REF_A and REF_B must be one of the component access types
3210          allowed by dr_analyze_indices.  */
3211       gcc_checking_assert (access_fn_component_p (ref_a));
3212       gcc_checking_assert (access_fn_component_p (ref_b));
3213
3214       /* Get the immediately-enclosing objects for REF_A and REF_B,
3215          i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3216          and DR_ACCESS_FN (B, INDEX_B).  */
3217       tree object_a = TREE_OPERAND (ref_a, 0);
3218       tree object_b = TREE_OPERAND (ref_b, 0);
3219
3220       tree type_a = TREE_TYPE (object_a);
3221       tree type_b = TREE_TYPE (object_b);
3222       if (access_fn_components_comparable_p (ref_a, ref_b))
3223         {
3224           /* This pair of component accesses is comparable for dependence
3225              analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3226              DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
3227           if (full_seq.start_a + full_seq.length != index_a
3228               || full_seq.start_b + full_seq.length != index_b)
3229             {
3230               /* The accesses don't extend the current sequence,
3231                  so start a new one here.  */
3232               full_seq.start_a = index_a;
3233               full_seq.start_b = index_b;
3234               full_seq.length = 0;
3235             }
3236
3237           /* Add this pair of references to the sequence.  */
3238           full_seq.length += 1;
3239           full_seq.object_a = object_a;
3240           full_seq.object_b = object_b;
3241
3242           /* If the enclosing objects are structures (and thus have the
3243              same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
3244           if (TREE_CODE (type_a) == RECORD_TYPE)
3245             struct_seq = full_seq;
3246
3247           /* Move to the next containing reference for both A and B.  */
3248           ref_a = object_a;
3249           ref_b = object_b;
3250           index_a += 1;
3251           index_b += 1;
3252           continue;
3253         }
3254
3255       /* Try to approach equal type sizes.  */
3256       if (!COMPLETE_TYPE_P (type_a)
3257           || !COMPLETE_TYPE_P (type_b)
3258           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3259           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3260         break;
3261
3262       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3263       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3264       if (size_a <= size_b)
3265         {
3266           index_a += 1;
3267           ref_a = object_a;
3268         }
3269       if (size_b <= size_a)
3270         {
3271           index_b += 1;
3272           ref_b = object_b;
3273         }
3274     }
3275
3276   /* See whether FULL_SEQ ends at the base and whether the two bases
3277      are equal.  We do not care about TBAA or alignment info so we can
3278      use OEP_ADDRESS_OF to avoid false negatives.  */
3279   tree base_a = indices_a->base_object;
3280   tree base_b = indices_b->base_object;
3281   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3282                       && full_seq.start_b + full_seq.length == num_dimensions_b
3283                       && (indices_a->unconstrained_base
3284                           == indices_b->unconstrained_base)
3285                       && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3286                       && (types_compatible_p (TREE_TYPE (base_a),
3287                                               TREE_TYPE (base_b))
3288                           || (!base_supports_access_fn_components_p (base_a)
3289                               && !base_supports_access_fn_components_p (base_b)
3290                               && operand_equal_p
3291                                    (TYPE_SIZE (TREE_TYPE (base_a)),
3292                                     TYPE_SIZE (TREE_TYPE (base_b)), 0)))
3293                       && (!loop_nest.exists ()
3294                           || (object_address_invariant_in_loop_p
3295                               (loop_nest[0], base_a))));
3296
3297   /* If the bases are the same, we can include the base variation too.
3298      E.g. the b accesses in:
3299
3300        for (int i = 0; i < n; ++i)
3301          b[i + 4][0] = b[i][0];
3302
3303      have a definite dependence distance of 4, while for:
3304
3305        for (int i = 0; i < n; ++i)
3306          a[i + 4][0] = b[i][0];
3307
3308      the dependence distance depends on the gap between a and b.
3309
3310      If the bases are different then we can only rely on the sequence
3311      rooted at a structure access, since arrays are allowed to overlap
3312      arbitrarily and change shape arbitrarily.  E.g. we treat this as
3313      valid code:
3314
3315        int a[256];
3316        ...
3317        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3318
3319      where two lvalues with the same int[4][3] type overlap, and where
3320      both lvalues are distinct from the object's declared type.  */
3321   if (same_base_p)
3322     {
3323       if (indices_a->unconstrained_base)
3324         full_seq.length += 1;
3325     }
3326   else
3327     full_seq = struct_seq;
3328
3329   /* Punt if we didn't find a suitable sequence.  */
3330   if (full_seq.length == 0)
3331     {
3332       if (use_alt_indices
3333           || (TREE_CODE (DR_REF (a)) == MEM_REF
3334               && TREE_CODE (DR_REF (b)) == MEM_REF)
3335           || may_be_nonaddressable_p (DR_REF (a))
3336           || may_be_nonaddressable_p (DR_REF (b)))
3337         {
3338           /* Fully exhausted possibilities.  */
3339           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3340           return res;
3341         }
3342
3343       /* Try evaluating both DRs as dereferences of pointers.  */
3344       if (!a->alt_indices.base_object
3345           && TREE_CODE (DR_REF (a)) != MEM_REF)
3346         {
3347           tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
3348                                  build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
3349                                  build_int_cst
3350                                    (reference_alias_ptr_type (DR_REF (a)), 0));
3351           dr_analyze_indices (&a->alt_indices, alt_ref,
3352                               loop_preheader_edge (loop_nest[0]),
3353                               loop_containing_stmt (DR_STMT (a)));
3354         }
3355       if (!b->alt_indices.base_object
3356           && TREE_CODE (DR_REF (b)) != MEM_REF)
3357         {
3358           tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
3359                                  build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
3360                                  build_int_cst
3361                                    (reference_alias_ptr_type (DR_REF (b)), 0));
3362           dr_analyze_indices (&b->alt_indices, alt_ref,
3363                               loop_preheader_edge (loop_nest[0]),
3364                               loop_containing_stmt (DR_STMT (b)));
3365         }
3366       return initialize_data_dependence_relation (res, loop_nest, true);
3367     }
3368
3369   if (!same_base_p)
3370     {
3371       /* Partial overlap is possible for different bases when strict aliasing
3372          is not in effect.  It's also possible if either base involves a union
3373          access; e.g. for:
3374
3375            struct s1 { int a[2]; };
3376            struct s2 { struct s1 b; int c; };
3377            struct s3 { int d; struct s1 e; };
3378            union u { struct s2 f; struct s3 g; } *p, *q;
3379
3380          the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3381          "p->g.e" (base "p->g") and might partially overlap the s1 at
3382          "q->g.e" (base "q->g").  */
3383       if (!flag_strict_aliasing
3384           || ref_contains_union_access_p (full_seq.object_a)
3385           || ref_contains_union_access_p (full_seq.object_b))
3386         {
3387           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3388           return res;
3389         }
3390
3391       DDR_COULD_BE_INDEPENDENT_P (res) = true;
3392       if (!loop_nest.exists ()
3393           || (object_address_invariant_in_loop_p (loop_nest[0],
3394                                                   full_seq.object_a)
3395               && object_address_invariant_in_loop_p (loop_nest[0],
3396                                                      full_seq.object_b)))
3397         {
3398           DDR_OBJECT_A (res) = full_seq.object_a;
3399           DDR_OBJECT_B (res) = full_seq.object_b;
3400         }
3401     }
3402
3403   DDR_AFFINE_P (res) = true;
3404   DDR_ARE_DEPENDENT (res) = NULL_TREE;
3405   DDR_SUBSCRIPTS (res).create (full_seq.length);
3406   DDR_LOOP_NEST (res) = loop_nest;
3407   DDR_SELF_REFERENCE (res) = false;
3408
3409   for (i = 0; i < full_seq.length; ++i)
3410     {
3411       struct subscript *subscript;
3412
3413       subscript = XNEW (struct subscript);
3414       SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
3415       SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
3416       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3417       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3418       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3419       SUB_DISTANCE (subscript) = chrec_dont_know;
3420       DDR_SUBSCRIPTS (res).safe_push (subscript);
3421     }
3422
3423   return res;
3424 }
3425
3426 /* Initialize a data dependence relation between data accesses A and
3427    B.  NB_LOOPS is the number of loops surrounding the references: the
3428    size of the classic distance/direction vectors.  */
3429
3430 struct data_dependence_relation *
3431 initialize_data_dependence_relation (struct data_reference *a,
3432                                      struct data_reference *b,
3433                                      vec<loop_p> loop_nest)
3434 {
3435   data_dependence_relation *res = XCNEW (struct data_dependence_relation);
3436   DDR_A (res) = a;
3437   DDR_B (res) = b;
3438   DDR_LOOP_NEST (res).create (0);
3439   DDR_SUBSCRIPTS (res).create (0);
3440   DDR_DIR_VECTS (res).create (0);
3441   DDR_DIST_VECTS (res).create (0);
3442
3443   if (a == NULL || b == NULL)
3444     {
3445       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3446       return res;
3447     }
3448
3449   /* If the data references do not alias, then they are independent.  */
3450   if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
3451     {
3452       DDR_ARE_DEPENDENT (res) = chrec_known;
3453       return res;
3454     }
3455
3456   return initialize_data_dependence_relation (res, loop_nest, false);
3457 }
3458
3459
3460 /* Frees memory used by the conflict function F.  */
3461
3462 static void
3463 free_conflict_function (conflict_function *f)
3464 {
3465   unsigned i;
3466
3467   if (CF_NONTRIVIAL_P (f))
3468     {
3469       for (i = 0; i < f->n; i++)
3470         affine_fn_free (f->fns[i]);
3471     }
3472   free (f);
3473 }
3474
3475 /* Frees memory used by SUBSCRIPTS.  */
3476
3477 static void
3478 free_subscripts (vec<subscript_p> subscripts)
3479 {
3480   for (subscript_p s : subscripts)
3481     {
3482       free_conflict_function (s->conflicting_iterations_in_a);
3483       free_conflict_function (s->conflicting_iterations_in_b);
3484       free (s);
3485     }
3486   subscripts.release ();
3487 }
3488
3489 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3490    description.  */
3491
3492 static inline void
3493 finalize_ddr_dependent (struct data_dependence_relation *ddr,
3494                         tree chrec)
3495 {
3496   DDR_ARE_DEPENDENT (ddr) = chrec;
3497   free_subscripts (DDR_SUBSCRIPTS (ddr));
3498   DDR_SUBSCRIPTS (ddr).create (0);
3499 }
3500
3501 /* The dependence relation DDR cannot be represented by a distance
3502    vector.  */
3503
3504 static inline void
3505 non_affine_dependence_relation (struct data_dependence_relation *ddr)
3506 {
3507   if (dump_file && (dump_flags & TDF_DETAILS))
3508     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3509
3510   DDR_AFFINE_P (ddr) = false;
3511 }
3512
3513 \f
3514
3515 /* This section contains the classic Banerjee tests.  */
3516
3517 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3518    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
3519
3520 static inline bool
3521 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3522 {
3523   return (evolution_function_is_constant_p (chrec_a)
3524           && evolution_function_is_constant_p (chrec_b));
3525 }
3526
3527 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3528    variable, i.e., if the SIV (Single Index Variable) test is true.  */
3529
3530 static bool
3531 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3532 {
3533   if ((evolution_function_is_constant_p (chrec_a)
3534        && evolution_function_is_univariate_p (chrec_b))
3535       || (evolution_function_is_constant_p (chrec_b)
3536           && evolution_function_is_univariate_p (chrec_a)))
3537     return true;
3538
3539   if (evolution_function_is_univariate_p (chrec_a)
3540       && evolution_function_is_univariate_p (chrec_b))
3541     {
3542       switch (TREE_CODE (chrec_a))
3543         {
3544         case POLYNOMIAL_CHREC:
3545           switch (TREE_CODE (chrec_b))
3546             {
3547             case POLYNOMIAL_CHREC:
3548               if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3549                 return false;
3550               /* FALLTHRU */
3551
3552             default:
3553               return true;
3554             }
3555
3556         default:
3557           return true;
3558         }
3559     }
3560
3561   return false;
3562 }
3563
3564 /* Creates a conflict function with N dimensions.  The affine functions
3565    in each dimension follow.  */
3566
3567 static conflict_function *
3568 conflict_fn (unsigned n, ...)
3569 {
3570   unsigned i;
3571   conflict_function *ret = XCNEW (conflict_function);
3572   va_list ap;
3573
3574   gcc_assert (n > 0 && n <= MAX_DIM);
3575   va_start (ap, n);
3576
3577   ret->n = n;
3578   for (i = 0; i < n; i++)
3579     ret->fns[i] = va_arg (ap, affine_fn);
3580   va_end (ap);
3581
3582   return ret;
3583 }
3584
3585 /* Returns constant affine function with value CST.  */
3586
3587 static affine_fn
3588 affine_fn_cst (tree cst)
3589 {
3590   affine_fn fn;
3591   fn.create (1);
3592   fn.quick_push (cst);
3593   return fn;
3594 }
3595
3596 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
3597
3598 static affine_fn
3599 affine_fn_univar (tree cst, unsigned dim, tree coef)
3600 {
3601   affine_fn fn;
3602   fn.create (dim + 1);
3603   unsigned i;
3604
3605   gcc_assert (dim > 0);
3606   fn.quick_push (cst);
3607   for (i = 1; i < dim; i++)
3608     fn.quick_push (integer_zero_node);
3609   fn.quick_push (coef);
3610   return fn;
3611 }
3612
3613 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
3614    *OVERLAPS_B are initialized to the functions that describe the
3615    relation between the elements accessed twice by CHREC_A and
3616    CHREC_B.  For k >= 0, the following property is verified:
3617
3618    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3619
3620 static void
3621 analyze_ziv_subscript (tree chrec_a,
3622                        tree chrec_b,
3623                        conflict_function **overlaps_a,
3624                        conflict_function **overlaps_b,
3625                        tree *last_conflicts)
3626 {
3627   tree type, difference;
3628   dependence_stats.num_ziv++;
3629
3630   if (dump_file && (dump_flags & TDF_DETAILS))
3631     fprintf (dump_file, "(analyze_ziv_subscript \n");
3632
3633   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3634   chrec_a = chrec_convert (type, chrec_a, NULL);
3635   chrec_b = chrec_convert (type, chrec_b, NULL);
3636   difference = chrec_fold_minus (type, chrec_a, chrec_b);
3637
3638   switch (TREE_CODE (difference))
3639     {
3640     case INTEGER_CST:
3641       if (integer_zerop (difference))
3642         {
3643           /* The difference is equal to zero: the accessed index
3644              overlaps for each iteration in the loop.  */
3645           *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3646           *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3647           *last_conflicts = chrec_dont_know;
3648           dependence_stats.num_ziv_dependent++;
3649         }
3650       else
3651         {
3652           /* The accesses do not overlap.  */
3653           *overlaps_a = conflict_fn_no_dependence ();
3654           *overlaps_b = conflict_fn_no_dependence ();
3655           *last_conflicts = integer_zero_node;
3656           dependence_stats.num_ziv_independent++;
3657         }
3658       break;
3659
3660     default:
3661       /* We're not sure whether the indexes overlap.  For the moment,
3662          conservatively answer "don't know".  */
3663       if (dump_file && (dump_flags & TDF_DETAILS))
3664         fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3665
3666       *overlaps_a = conflict_fn_not_known ();
3667       *overlaps_b = conflict_fn_not_known ();
3668       *last_conflicts = chrec_dont_know;
3669       dependence_stats.num_ziv_unimplemented++;
3670       break;
3671     }
3672
3673   if (dump_file && (dump_flags & TDF_DETAILS))
3674     fprintf (dump_file, ")\n");
3675 }
3676
3677 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3678    and only if it fits to the int type.  If this is not the case, or the
3679    bound  on the number of iterations of LOOP could not be derived, returns
3680    chrec_dont_know.  */
3681
3682 static tree
3683 max_stmt_executions_tree (class loop *loop)
3684 {
3685   widest_int nit;
3686
3687   if (!max_stmt_executions (loop, &nit))
3688     return chrec_dont_know;
3689
3690   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3691     return chrec_dont_know;
3692
3693   return wide_int_to_tree (unsigned_type_node, nit);
3694 }
3695
3696 /* Determine whether the CHREC is always positive/negative.  If the expression
3697    cannot be statically analyzed, return false, otherwise set the answer into
3698    VALUE.  */
3699
3700 static bool
3701 chrec_is_positive (tree chrec, bool *value)
3702 {
3703   bool value0, value1, value2;
3704   tree end_value, nb_iter;
3705
3706   switch (TREE_CODE (chrec))
3707     {
3708     case POLYNOMIAL_CHREC:
3709       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3710           || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3711         return false;
3712
3713       /* FIXME -- overflows.  */
3714       if (value0 == value1)
3715         {
3716           *value = value0;
3717           return true;
3718         }
3719
3720       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3721          and the proof consists in showing that the sign never
3722          changes during the execution of the loop, from 0 to
3723          loop->nb_iterations.  */
3724       if (!evolution_function_is_affine_p (chrec))
3725         return false;
3726
3727       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3728       if (chrec_contains_undetermined (nb_iter))
3729         return false;
3730
3731 #if 0
3732       /* TODO -- If the test is after the exit, we may decrease the number of
3733          iterations by one.  */
3734       if (after_exit)
3735         nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3736 #endif
3737
3738       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3739
3740       if (!chrec_is_positive (end_value, &value2))
3741         return false;
3742
3743       *value = value0;
3744       return value0 == value1;
3745
3746     case INTEGER_CST:
3747       switch (tree_int_cst_sgn (chrec))
3748         {
3749         case -1:
3750           *value = false;
3751           break;
3752         case 1:
3753           *value = true;
3754           break;
3755         default:
3756           return false;
3757         }
3758       return true;
3759
3760     default:
3761       return false;
3762     }
3763 }
3764
3765
3766 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3767    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
3768    *OVERLAPS_B are initialized to the functions that describe the
3769    relation between the elements accessed twice by CHREC_A and
3770    CHREC_B.  For k >= 0, the following property is verified:
3771
3772    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3773
3774 static void
3775 analyze_siv_subscript_cst_affine (tree chrec_a,
3776                                   tree chrec_b,
3777                                   conflict_function **overlaps_a,
3778                                   conflict_function **overlaps_b,
3779                                   tree *last_conflicts)
3780 {
3781   bool value0, value1, value2;
3782   tree type, difference, tmp;
3783
3784   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3785   chrec_a = chrec_convert (type, chrec_a, NULL);
3786   chrec_b = chrec_convert (type, chrec_b, NULL);
3787   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3788
3789   /* Special case overlap in the first iteration.  */
3790   if (integer_zerop (difference))
3791     {
3792       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3793       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3794       *last_conflicts = integer_one_node;
3795       return;
3796     }
3797
3798   if (!chrec_is_positive (initial_condition (difference), &value0))
3799     {
3800       if (dump_file && (dump_flags & TDF_DETAILS))
3801         fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3802
3803       dependence_stats.num_siv_unimplemented++;
3804       *overlaps_a = conflict_fn_not_known ();
3805       *overlaps_b = conflict_fn_not_known ();
3806       *last_conflicts = chrec_dont_know;
3807       return;
3808     }
3809   else
3810     {
3811       if (value0 == false)
3812         {
3813           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3814               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3815             {
3816               if (dump_file && (dump_flags & TDF_DETAILS))
3817                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3818
3819               *overlaps_a = conflict_fn_not_known ();
3820               *overlaps_b = conflict_fn_not_known ();
3821               *last_conflicts = chrec_dont_know;
3822               dependence_stats.num_siv_unimplemented++;
3823               return;
3824             }
3825           else
3826             {
3827               if (value1 == true)
3828                 {
3829                   /* Example:
3830                      chrec_a = 12
3831                      chrec_b = {10, +, 1}
3832                   */
3833
3834                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3835                     {
3836                       HOST_WIDE_INT numiter;
3837                       class loop *loop = get_chrec_loop (chrec_b);
3838
3839                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3840                       tmp = fold_build2 (EXACT_DIV_EXPR, type,
3841                                          fold_build1 (ABS_EXPR, type, difference),
3842                                          CHREC_RIGHT (chrec_b));
3843                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3844                       *last_conflicts = integer_one_node;
3845
3846
3847                       /* Perform weak-zero siv test to see if overlap is
3848                          outside the loop bounds.  */
3849                       numiter = max_stmt_executions_int (loop);
3850
3851                       if (numiter >= 0
3852                           && compare_tree_int (tmp, numiter) > 0)
3853                         {
3854                           free_conflict_function (*overlaps_a);
3855                           free_conflict_function (*overlaps_b);
3856                           *overlaps_a = conflict_fn_no_dependence ();
3857                           *overlaps_b = conflict_fn_no_dependence ();
3858                           *last_conflicts = integer_zero_node;
3859                           dependence_stats.num_siv_independent++;
3860                           return;
3861                         }
3862                       dependence_stats.num_siv_dependent++;
3863                       return;
3864                     }
3865
3866                   /* When the step does not divide the difference, there are
3867                      no overlaps.  */
3868                   else
3869                     {
3870                       *overlaps_a = conflict_fn_no_dependence ();
3871                       *overlaps_b = conflict_fn_no_dependence ();
3872                       *last_conflicts = integer_zero_node;
3873                       dependence_stats.num_siv_independent++;
3874                       return;
3875                     }
3876                 }
3877
3878               else
3879                 {
3880                   /* Example:
3881                      chrec_a = 12
3882                      chrec_b = {10, +, -1}
3883
3884                      In this case, chrec_a will not overlap with chrec_b.  */
3885                   *overlaps_a = conflict_fn_no_dependence ();
3886                   *overlaps_b = conflict_fn_no_dependence ();
3887                   *last_conflicts = integer_zero_node;
3888                   dependence_stats.num_siv_independent++;
3889                   return;
3890                 }
3891             }
3892         }
3893       else
3894         {
3895           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3896               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3897             {
3898               if (dump_file && (dump_flags & TDF_DETAILS))
3899                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3900
3901               *overlaps_a = conflict_fn_not_known ();
3902               *overlaps_b = conflict_fn_not_known ();
3903               *last_conflicts = chrec_dont_know;
3904               dependence_stats.num_siv_unimplemented++;
3905               return;
3906             }
3907           else
3908             {
3909               if (value2 == false)
3910                 {
3911                   /* Example:
3912                      chrec_a = 3
3913                      chrec_b = {10, +, -1}
3914                   */
3915                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3916                     {
3917                       HOST_WIDE_INT numiter;
3918                       class loop *loop = get_chrec_loop (chrec_b);
3919
3920                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3921                       tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3922                                          CHREC_RIGHT (chrec_b));
3923                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3924                       *last_conflicts = integer_one_node;
3925
3926                       /* Perform weak-zero siv test to see if overlap is
3927                          outside the loop bounds.  */
3928                       numiter = max_stmt_executions_int (loop);
3929
3930                       if (numiter >= 0
3931                           && compare_tree_int (tmp, numiter) > 0)
3932                         {
3933                           free_conflict_function (*overlaps_a);
3934                           free_conflict_function (*overlaps_b);
3935                           *overlaps_a = conflict_fn_no_dependence ();
3936                           *overlaps_b = conflict_fn_no_dependence ();
3937                           *last_conflicts = integer_zero_node;
3938                           dependence_stats.num_siv_independent++;
3939                           return;
3940                         }
3941                       dependence_stats.num_siv_dependent++;
3942                       return;
3943                     }
3944
3945                   /* When the step does not divide the difference, there
3946                      are no overlaps.  */
3947                   else
3948                     {
3949                       *overlaps_a = conflict_fn_no_dependence ();
3950                       *overlaps_b = conflict_fn_no_dependence ();
3951                       *last_conflicts = integer_zero_node;
3952                       dependence_stats.num_siv_independent++;
3953                       return;
3954                     }
3955                 }
3956               else
3957                 {
3958                   /* Example:
3959                      chrec_a = 3
3960                      chrec_b = {4, +, 1}
3961
3962                      In this case, chrec_a will not overlap with chrec_b.  */
3963                   *overlaps_a = conflict_fn_no_dependence ();
3964                   *overlaps_b = conflict_fn_no_dependence ();
3965                   *last_conflicts = integer_zero_node;
3966                   dependence_stats.num_siv_independent++;
3967                   return;
3968                 }
3969             }
3970         }
3971     }
3972 }
3973
3974 /* Helper recursive function for initializing the matrix A.  Returns
3975    the initial value of CHREC.  */
3976
3977 static tree
3978 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3979 {
3980   gcc_assert (chrec);
3981
3982   switch (TREE_CODE (chrec))
3983     {
3984     case POLYNOMIAL_CHREC:
3985       HOST_WIDE_INT chrec_right;
3986       if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
3987         return chrec_dont_know;
3988       chrec_right = int_cst_value (CHREC_RIGHT (chrec));
3989       /* We want to be able to negate without overflow.  */
3990       if (chrec_right == HOST_WIDE_INT_MIN)
3991         return chrec_dont_know;
3992       A[index][0] = mult * chrec_right;
3993       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3994
3995     case PLUS_EXPR:
3996     case MULT_EXPR:
3997     case MINUS_EXPR:
3998       {
3999         tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4000         tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
4001
4002         return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
4003       }
4004
4005     CASE_CONVERT:
4006       {
4007         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4008         return chrec_convert (chrec_type (chrec), op, NULL);
4009       }
4010
4011     case BIT_NOT_EXPR:
4012       {
4013         /* Handle ~X as -1 - X.  */
4014         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4015         return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
4016                               build_int_cst (TREE_TYPE (chrec), -1), op);
4017       }
4018
4019     case INTEGER_CST:
4020       return chrec;
4021
4022     default:
4023       gcc_unreachable ();
4024       return NULL_TREE;
4025     }
4026 }
4027
4028 #define FLOOR_DIV(x,y) ((x) / (y))
4029
4030 /* Solves the special case of the Diophantine equation:
4031    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4032
4033    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
4034    number of iterations that loops X and Y run.  The overlaps will be
4035    constructed as evolutions in dimension DIM.  */
4036
4037 static void
4038 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
4039                                          HOST_WIDE_INT step_a,
4040                                          HOST_WIDE_INT step_b,
4041                                          affine_fn *overlaps_a,
4042                                          affine_fn *overlaps_b,
4043                                          tree *last_conflicts, int dim)
4044 {
4045   if (((step_a > 0 && step_b > 0)
4046        || (step_a < 0 && step_b < 0)))
4047     {
4048       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
4049       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
4050
4051       gcd_steps_a_b = gcd (step_a, step_b);
4052       step_overlaps_a = step_b / gcd_steps_a_b;
4053       step_overlaps_b = step_a / gcd_steps_a_b;
4054
4055       if (niter > 0)
4056         {
4057           tau2 = FLOOR_DIV (niter, step_overlaps_a);
4058           tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
4059           last_conflict = tau2;
4060           *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
4061         }
4062       else
4063         *last_conflicts = chrec_dont_know;
4064
4065       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
4066                                       build_int_cst (NULL_TREE,
4067                                                      step_overlaps_a));
4068       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
4069                                       build_int_cst (NULL_TREE,
4070                                                      step_overlaps_b));
4071     }
4072
4073   else
4074     {
4075       *overlaps_a = affine_fn_cst (integer_zero_node);
4076       *overlaps_b = affine_fn_cst (integer_zero_node);
4077       *last_conflicts = integer_zero_node;
4078     }
4079 }
4080
4081 /* Solves the special case of a Diophantine equation where CHREC_A is
4082    an affine bivariate function, and CHREC_B is an affine univariate
4083    function.  For example,
4084
4085    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4086
4087    has the following overlapping functions:
4088
4089    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4090    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4091    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4092
4093    FORNOW: This is a specialized implementation for a case occurring in
4094    a common benchmark.  Implement the general algorithm.  */
4095
4096 static void
4097 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
4098                                       conflict_function **overlaps_a,
4099                                       conflict_function **overlaps_b,
4100                                       tree *last_conflicts)
4101 {
4102   bool xz_p, yz_p, xyz_p;
4103   HOST_WIDE_INT step_x, step_y, step_z;
4104   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
4105   affine_fn overlaps_a_xz, overlaps_b_xz;
4106   affine_fn overlaps_a_yz, overlaps_b_yz;
4107   affine_fn overlaps_a_xyz, overlaps_b_xyz;
4108   affine_fn ova1, ova2, ovb;
4109   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
4110
4111   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
4112   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
4113   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
4114
4115   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
4116   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
4117   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
4118
4119   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
4120     {
4121       if (dump_file && (dump_flags & TDF_DETAILS))
4122         fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
4123
4124       *overlaps_a = conflict_fn_not_known ();
4125       *overlaps_b = conflict_fn_not_known ();
4126       *last_conflicts = chrec_dont_know;
4127       return;
4128     }
4129
4130   niter = MIN (niter_x, niter_z);
4131   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
4132                                            &overlaps_a_xz,
4133                                            &overlaps_b_xz,
4134                                            &last_conflicts_xz, 1);
4135   niter = MIN (niter_y, niter_z);
4136   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
4137                                            &overlaps_a_yz,
4138                                            &overlaps_b_yz,
4139                                            &last_conflicts_yz, 2);
4140   niter = MIN (niter_x, niter_z);
4141   niter = MIN (niter_y, niter);
4142   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
4143                                            &overlaps_a_xyz,
4144                                            &overlaps_b_xyz,
4145                                            &last_conflicts_xyz, 3);
4146
4147   xz_p = !integer_zerop (last_conflicts_xz);
4148   yz_p = !integer_zerop (last_conflicts_yz);
4149   xyz_p = !integer_zerop (last_conflicts_xyz);
4150
4151   if (xz_p || yz_p || xyz_p)
4152     {
4153       ova1 = affine_fn_cst (integer_zero_node);
4154       ova2 = affine_fn_cst (integer_zero_node);
4155       ovb = affine_fn_cst (integer_zero_node);
4156       if (xz_p)
4157         {
4158           affine_fn t0 = ova1;
4159           affine_fn t2 = ovb;
4160
4161           ova1 = affine_fn_plus (ova1, overlaps_a_xz);
4162           ovb = affine_fn_plus (ovb, overlaps_b_xz);
4163           affine_fn_free (t0);
4164           affine_fn_free (t2);
4165           *last_conflicts = last_conflicts_xz;
4166         }
4167       if (yz_p)
4168         {
4169           affine_fn t0 = ova2;
4170           affine_fn t2 = ovb;
4171
4172           ova2 = affine_fn_plus (ova2, overlaps_a_yz);
4173           ovb = affine_fn_plus (ovb, overlaps_b_yz);
4174           affine_fn_free (t0);
4175           affine_fn_free (t2);
4176           *last_conflicts = last_conflicts_yz;
4177         }
4178       if (xyz_p)
4179         {
4180           affine_fn t0 = ova1;
4181           affine_fn t2 = ova2;
4182           affine_fn t4 = ovb;
4183
4184           ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
4185           ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
4186           ovb = affine_fn_plus (ovb, overlaps_b_xyz);
4187           affine_fn_free (t0);
4188           affine_fn_free (t2);
4189           affine_fn_free (t4);
4190           *last_conflicts = last_conflicts_xyz;
4191         }
4192       *overlaps_a = conflict_fn (2, ova1, ova2);
4193       *overlaps_b = conflict_fn (1, ovb);
4194     }
4195   else
4196     {
4197       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4198       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4199       *last_conflicts = integer_zero_node;
4200     }
4201
4202   affine_fn_free (overlaps_a_xz);
4203   affine_fn_free (overlaps_b_xz);
4204   affine_fn_free (overlaps_a_yz);
4205   affine_fn_free (overlaps_b_yz);
4206   affine_fn_free (overlaps_a_xyz);
4207   affine_fn_free (overlaps_b_xyz);
4208 }
4209
4210 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
4211
4212 static void
4213 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
4214                     int size)
4215 {
4216   memcpy (vec2, vec1, size * sizeof (*vec1));
4217 }
4218
4219 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
4220
4221 static void
4222 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
4223                     int m, int n)
4224 {
4225   int i;
4226
4227   for (i = 0; i < m; i++)
4228     lambda_vector_copy (mat1[i], mat2[i], n);
4229 }
4230
4231 /* Store the N x N identity matrix in MAT.  */
4232
4233 static void
4234 lambda_matrix_id (lambda_matrix mat, int size)
4235 {
4236   int i, j;
4237
4238   for (i = 0; i < size; i++)
4239     for (j = 0; j < size; j++)
4240       mat[i][j] = (i == j) ? 1 : 0;
4241 }
4242
4243 /* Return the index of the first nonzero element of vector VEC1 between
4244    START and N.  We must have START <= N.
4245    Returns N if VEC1 is the zero vector.  */
4246
4247 static int
4248 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
4249 {
4250   int j = start;
4251   while (j < n && vec1[j] == 0)
4252     j++;
4253   return j;
4254 }
4255
4256 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4257    R2 = R2 + CONST1 * R1.  */
4258
4259 static bool
4260 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
4261                        lambda_int const1)
4262 {
4263   int i;
4264
4265   if (const1 == 0)
4266     return true;
4267
4268   for (i = 0; i < n; i++)
4269     {
4270       bool ovf;
4271       lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
4272       if (ovf)
4273         return false;
4274       lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
4275       if (ovf || tem2 == HOST_WIDE_INT_MIN)
4276         return false;
4277       mat[r2][i] = tem2;
4278     }
4279
4280   return true;
4281 }
4282
4283 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
4284    and store the result in VEC2.  */
4285
4286 static void
4287 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
4288                           int size, lambda_int const1)
4289 {
4290   int i;
4291
4292   if (const1 == 0)
4293     lambda_vector_clear (vec2, size);
4294   else
4295     for (i = 0; i < size; i++)
4296       vec2[i] = const1 * vec1[i];
4297 }
4298
4299 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
4300
4301 static void
4302 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
4303                       int size)
4304 {
4305   lambda_vector_mult_const (vec1, vec2, size, -1);
4306 }
4307
4308 /* Negate row R1 of matrix MAT which has N columns.  */
4309
4310 static void
4311 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
4312 {
4313   lambda_vector_negate (mat[r1], mat[r1], n);
4314 }
4315
4316 /* Return true if two vectors are equal.  */
4317
4318 static bool
4319 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
4320 {
4321   int i;
4322   for (i = 0; i < size; i++)
4323     if (vec1[i] != vec2[i])
4324       return false;
4325   return true;
4326 }
4327
4328 /* Given an M x N integer matrix A, this function determines an M x
4329    M unimodular matrix U, and an M x N echelon matrix S such that
4330    "U.A = S".  This decomposition is also known as "right Hermite".
4331
4332    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4333    Restructuring Compilers" Utpal Banerjee.  */
4334
4335 static bool
4336 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4337                              lambda_matrix S, lambda_matrix U)
4338 {
4339   int i, j, i0 = 0;
4340
4341   lambda_matrix_copy (A, S, m, n);
4342   lambda_matrix_id (U, m);
4343
4344   for (j = 0; j < n; j++)
4345     {
4346       if (lambda_vector_first_nz (S[j], m, i0) < m)
4347         {
4348           ++i0;
4349           for (i = m - 1; i >= i0; i--)
4350             {
4351               while (S[i][j] != 0)
4352                 {
4353                   lambda_int factor, a, b;
4354
4355                   a = S[i-1][j];
4356                   b = S[i][j];
4357                   gcc_assert (a != HOST_WIDE_INT_MIN);
4358                   factor = a / b;
4359
4360                   if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
4361                     return false;
4362                   std::swap (S[i], S[i-1]);
4363
4364                   if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
4365                     return false;
4366                   std::swap (U[i], U[i-1]);
4367                 }
4368             }
4369         }
4370     }
4371
4372   return true;
4373 }
4374
4375 /* Determines the overlapping elements due to accesses CHREC_A and
4376    CHREC_B, that are affine functions.  This function cannot handle
4377    symbolic evolution functions, ie. when initial conditions are
4378    parameters, because it uses lambda matrices of integers.  */
4379
4380 static void
4381 analyze_subscript_affine_affine (tree chrec_a,
4382                                  tree chrec_b,
4383                                  conflict_function **overlaps_a,
4384                                  conflict_function **overlaps_b,
4385                                  tree *last_conflicts)
4386 {
4387   unsigned nb_vars_a, nb_vars_b, dim;
4388   lambda_int gamma, gcd_alpha_beta;
4389   lambda_matrix A, U, S;
4390   struct obstack scratch_obstack;
4391
4392   if (eq_evolutions_p (chrec_a, chrec_b))
4393     {
4394       /* The accessed index overlaps for each iteration in the
4395          loop.  */
4396       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4397       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4398       *last_conflicts = chrec_dont_know;
4399       return;
4400     }
4401   if (dump_file && (dump_flags & TDF_DETAILS))
4402     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4403
4404   /* For determining the initial intersection, we have to solve a
4405      Diophantine equation.  This is the most time consuming part.
4406
4407      For answering to the question: "Is there a dependence?" we have
4408      to prove that there exists a solution to the Diophantine
4409      equation, and that the solution is in the iteration domain,
4410      i.e. the solution is positive or zero, and that the solution
4411      happens before the upper bound loop.nb_iterations.  Otherwise
4412      there is no dependence.  This function outputs a description of
4413      the iterations that hold the intersections.  */
4414
4415   nb_vars_a = nb_vars_in_chrec (chrec_a);
4416   nb_vars_b = nb_vars_in_chrec (chrec_b);
4417
4418   gcc_obstack_init (&scratch_obstack);
4419
4420   dim = nb_vars_a + nb_vars_b;
4421   U = lambda_matrix_new (dim, dim, &scratch_obstack);
4422   A = lambda_matrix_new (dim, 1, &scratch_obstack);
4423   S = lambda_matrix_new (dim, 1, &scratch_obstack);
4424
4425   tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4426   tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4427   if (init_a == chrec_dont_know
4428       || init_b == chrec_dont_know)
4429     {
4430       if (dump_file && (dump_flags & TDF_DETAILS))
4431         fprintf (dump_file, "affine-affine test failed: "
4432                  "representation issue.\n");
4433       *overlaps_a = conflict_fn_not_known ();
4434       *overlaps_b = conflict_fn_not_known ();
4435       *last_conflicts = chrec_dont_know;
4436       goto end_analyze_subs_aa;
4437     }
4438   gamma = int_cst_value (init_b) - int_cst_value (init_a);
4439
4440   /* Don't do all the hard work of solving the Diophantine equation
4441      when we already know the solution: for example,
4442      | {3, +, 1}_1
4443      | {3, +, 4}_2
4444      | gamma = 3 - 3 = 0.
4445      Then the first overlap occurs during the first iterations:
4446      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4447   */
4448   if (gamma == 0)
4449     {
4450       if (nb_vars_a == 1 && nb_vars_b == 1)
4451         {
4452           HOST_WIDE_INT step_a, step_b;
4453           HOST_WIDE_INT niter, niter_a, niter_b;
4454           affine_fn ova, ovb;
4455
4456           niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4457           niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4458           niter = MIN (niter_a, niter_b);
4459           step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4460           step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4461
4462           compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4463                                                    &ova, &ovb,
4464                                                    last_conflicts, 1);
4465           *overlaps_a = conflict_fn (1, ova);
4466           *overlaps_b = conflict_fn (1, ovb);
4467         }
4468
4469       else if (nb_vars_a == 2 && nb_vars_b == 1)
4470         compute_overlap_steps_for_affine_1_2
4471           (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4472
4473       else if (nb_vars_a == 1 && nb_vars_b == 2)
4474         compute_overlap_steps_for_affine_1_2
4475           (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4476
4477       else
4478         {
4479           if (dump_file && (dump_flags & TDF_DETAILS))
4480             fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4481           *overlaps_a = conflict_fn_not_known ();
4482           *overlaps_b = conflict_fn_not_known ();
4483           *last_conflicts = chrec_dont_know;
4484         }
4485       goto end_analyze_subs_aa;
4486     }
4487
4488   /* U.A = S */
4489   if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
4490     {
4491       *overlaps_a = conflict_fn_not_known ();
4492       *overlaps_b = conflict_fn_not_known ();
4493       *last_conflicts = chrec_dont_know;
4494       goto end_analyze_subs_aa;
4495     }
4496
4497   if (S[0][0] < 0)
4498     {
4499       S[0][0] *= -1;
4500       lambda_matrix_row_negate (U, dim, 0);
4501     }
4502   gcd_alpha_beta = S[0][0];
4503
4504   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4505      but that is a quite strange case.  Instead of ICEing, answer
4506      don't know.  */
4507   if (gcd_alpha_beta == 0)
4508     {
4509       *overlaps_a = conflict_fn_not_known ();
4510       *overlaps_b = conflict_fn_not_known ();
4511       *last_conflicts = chrec_dont_know;
4512       goto end_analyze_subs_aa;
4513     }
4514
4515   /* The classic "gcd-test".  */
4516   if (!int_divides_p (gcd_alpha_beta, gamma))
4517     {
4518       /* The "gcd-test" has determined that there is no integer
4519          solution, i.e. there is no dependence.  */
4520       *overlaps_a = conflict_fn_no_dependence ();
4521       *overlaps_b = conflict_fn_no_dependence ();
4522       *last_conflicts = integer_zero_node;
4523     }
4524
4525   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
4526   else if (nb_vars_a == 1 && nb_vars_b == 1)
4527     {
4528       /* Both functions should have the same evolution sign.  */
4529       if (((A[0][0] > 0 && -A[1][0] > 0)
4530            || (A[0][0] < 0 && -A[1][0] < 0)))
4531         {
4532           /* The solutions are given by:
4533              |
4534              | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
4535              |                           [u21 u22]    [y0]
4536
4537              For a given integer t.  Using the following variables,
4538
4539              | i0 = u11 * gamma / gcd_alpha_beta
4540              | j0 = u12 * gamma / gcd_alpha_beta
4541              | i1 = u21
4542              | j1 = u22
4543
4544              the solutions are:
4545
4546              | x0 = i0 + i1 * t,
4547              | y0 = j0 + j1 * t.  */
4548           HOST_WIDE_INT i0, j0, i1, j1;
4549
4550           i0 = U[0][0] * gamma / gcd_alpha_beta;
4551           j0 = U[0][1] * gamma / gcd_alpha_beta;
4552           i1 = U[1][0];
4553           j1 = U[1][1];
4554
4555           if ((i1 == 0 && i0 < 0)
4556               || (j1 == 0 && j0 < 0))
4557             {
4558               /* There is no solution.
4559                  FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4560                  falls in here, but for the moment we don't look at the
4561                  upper bound of the iteration domain.  */
4562               *overlaps_a = conflict_fn_no_dependence ();
4563               *overlaps_b = conflict_fn_no_dependence ();
4564               *last_conflicts = integer_zero_node;
4565               goto end_analyze_subs_aa;
4566             }
4567
4568           if (i1 > 0 && j1 > 0)
4569             {
4570               HOST_WIDE_INT niter_a
4571                 = max_stmt_executions_int (get_chrec_loop (chrec_a));
4572               HOST_WIDE_INT niter_b
4573                 = max_stmt_executions_int (get_chrec_loop (chrec_b));
4574               HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4575
4576               /* (X0, Y0) is a solution of the Diophantine equation:
4577                  "chrec_a (X0) = chrec_b (Y0)".  */
4578               HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4579                                         CEIL (-j0, j1));
4580               HOST_WIDE_INT x0 = i1 * tau1 + i0;
4581               HOST_WIDE_INT y0 = j1 * tau1 + j0;
4582
4583               /* (X1, Y1) is the smallest positive solution of the eq
4584                  "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4585                  first conflict occurs.  */
4586               HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4587               HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4588               HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4589
4590               if (niter > 0)
4591                 {
4592                   /* If the overlap occurs outside of the bounds of the
4593                      loop, there is no dependence.  */
4594                   if (x1 >= niter_a || y1 >= niter_b)
4595                     {
4596                       *overlaps_a = conflict_fn_no_dependence ();
4597                       *overlaps_b = conflict_fn_no_dependence ();
4598                       *last_conflicts = integer_zero_node;
4599                       goto end_analyze_subs_aa;
4600                     }
4601
4602                   /* max stmt executions can get quite large, avoid
4603                      overflows by using wide ints here.  */
4604                   widest_int tau2
4605                     = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4606                                 wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4607                   widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4608                   if (wi::min_precision (last_conflict, SIGNED)
4609                       <= TYPE_PRECISION (integer_type_node))
4610                     *last_conflicts
4611                        = build_int_cst (integer_type_node,
4612                                         last_conflict.to_shwi ());
4613                   else
4614                     *last_conflicts = chrec_dont_know;
4615                 }
4616               else
4617                 *last_conflicts = chrec_dont_know;
4618
4619               *overlaps_a
4620                 = conflict_fn (1,
4621                                affine_fn_univar (build_int_cst (NULL_TREE, x1),
4622                                                  1,
4623                                                  build_int_cst (NULL_TREE, i1)));
4624               *overlaps_b
4625                 = conflict_fn (1,
4626                                affine_fn_univar (build_int_cst (NULL_TREE, y1),
4627                                                  1,
4628                                                  build_int_cst (NULL_TREE, j1)));
4629             }
4630           else
4631             {
4632               /* FIXME: For the moment, the upper bound of the
4633                  iteration domain for i and j is not checked.  */
4634               if (dump_file && (dump_flags & TDF_DETAILS))
4635                 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4636               *overlaps_a = conflict_fn_not_known ();
4637               *overlaps_b = conflict_fn_not_known ();
4638               *last_conflicts = chrec_dont_know;
4639             }
4640         }
4641       else
4642         {
4643           if (dump_file && (dump_flags & TDF_DETAILS))
4644             fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4645           *overlaps_a = conflict_fn_not_known ();
4646           *overlaps_b = conflict_fn_not_known ();
4647           *last_conflicts = chrec_dont_know;
4648         }
4649     }
4650   else
4651     {
4652       if (dump_file && (dump_flags & TDF_DETAILS))
4653         fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4654       *overlaps_a = conflict_fn_not_known ();
4655       *overlaps_b = conflict_fn_not_known ();
4656       *last_conflicts = chrec_dont_know;
4657     }
4658
4659 end_analyze_subs_aa:
4660   obstack_free (&scratch_obstack, NULL);
4661   if (dump_file && (dump_flags & TDF_DETAILS))
4662     {
4663       fprintf (dump_file, "  (overlaps_a = ");
4664       dump_conflict_function (dump_file, *overlaps_a);
4665       fprintf (dump_file, ")\n  (overlaps_b = ");
4666       dump_conflict_function (dump_file, *overlaps_b);
4667       fprintf (dump_file, "))\n");
4668     }
4669 }
4670
4671 /* Returns true when analyze_subscript_affine_affine can be used for
4672    determining the dependence relation between chrec_a and chrec_b,
4673    that contain symbols.  This function modifies chrec_a and chrec_b
4674    such that the analysis result is the same, and such that they don't
4675    contain symbols, and then can safely be passed to the analyzer.
4676
4677    Example: The analysis of the following tuples of evolutions produce
4678    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4679    vs. {0, +, 1}_1
4680
4681    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4682    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4683 */
4684
4685 static bool
4686 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4687 {
4688   tree diff, type, left_a, left_b, right_b;
4689
4690   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4691       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4692     /* FIXME: For the moment not handled.  Might be refined later.  */
4693     return false;
4694
4695   type = chrec_type (*chrec_a);
4696   left_a = CHREC_LEFT (*chrec_a);
4697   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4698   diff = chrec_fold_minus (type, left_a, left_b);
4699
4700   if (!evolution_function_is_constant_p (diff))
4701     return false;
4702
4703   if (dump_file && (dump_flags & TDF_DETAILS))
4704     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4705
4706   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4707                                      diff, CHREC_RIGHT (*chrec_a));
4708   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4709   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4710                                      build_int_cst (type, 0),
4711                                      right_b);
4712   return true;
4713 }
4714
4715 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
4716    *OVERLAPS_B are initialized to the functions that describe the
4717    relation between the elements accessed twice by CHREC_A and
4718    CHREC_B.  For k >= 0, the following property is verified:
4719
4720    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4721
4722 static void
4723 analyze_siv_subscript (tree chrec_a,
4724                        tree chrec_b,
4725                        conflict_function **overlaps_a,
4726                        conflict_function **overlaps_b,
4727                        tree *last_conflicts,
4728                        int loop_nest_num)
4729 {
4730   dependence_stats.num_siv++;
4731
4732   if (dump_file && (dump_flags & TDF_DETAILS))
4733     fprintf (dump_file, "(analyze_siv_subscript \n");
4734
4735   if (evolution_function_is_constant_p (chrec_a)
4736       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4737     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4738                                       overlaps_a, overlaps_b, last_conflicts);
4739
4740   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4741            && evolution_function_is_constant_p (chrec_b))
4742     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4743                                       overlaps_b, overlaps_a, last_conflicts);
4744
4745   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4746            && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4747     {
4748       if (!chrec_contains_symbols (chrec_a)
4749           && !chrec_contains_symbols (chrec_b))
4750         {
4751           analyze_subscript_affine_affine (chrec_a, chrec_b,
4752                                            overlaps_a, overlaps_b,
4753                                            last_conflicts);
4754
4755           if (CF_NOT_KNOWN_P (*overlaps_a)
4756               || CF_NOT_KNOWN_P (*overlaps_b))
4757             dependence_stats.num_siv_unimplemented++;
4758           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4759                    || CF_NO_DEPENDENCE_P (*overlaps_b))
4760             dependence_stats.num_siv_independent++;
4761           else
4762             dependence_stats.num_siv_dependent++;
4763         }
4764       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4765                                                         &chrec_b))
4766         {
4767           analyze_subscript_affine_affine (chrec_a, chrec_b,
4768                                            overlaps_a, overlaps_b,
4769                                            last_conflicts);
4770
4771           if (CF_NOT_KNOWN_P (*overlaps_a)
4772               || CF_NOT_KNOWN_P (*overlaps_b))
4773             dependence_stats.num_siv_unimplemented++;
4774           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4775                    || CF_NO_DEPENDENCE_P (*overlaps_b))
4776             dependence_stats.num_siv_independent++;
4777           else
4778             dependence_stats.num_siv_dependent++;
4779         }
4780       else
4781         goto siv_subscript_dontknow;
4782     }
4783
4784   else
4785     {
4786     siv_subscript_dontknow:;
4787       if (dump_file && (dump_flags & TDF_DETAILS))
4788         fprintf (dump_file, "  siv test failed: unimplemented");
4789       *overlaps_a = conflict_fn_not_known ();
4790       *overlaps_b = conflict_fn_not_known ();
4791       *last_conflicts = chrec_dont_know;
4792       dependence_stats.num_siv_unimplemented++;
4793     }
4794
4795   if (dump_file && (dump_flags & TDF_DETAILS))
4796     fprintf (dump_file, ")\n");
4797 }
4798
4799 /* Returns false if we can prove that the greatest common divisor of the steps
4800    of CHREC does not divide CST, false otherwise.  */
4801
4802 static bool
4803 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4804 {
4805   HOST_WIDE_INT cd = 0, val;
4806   tree step;
4807
4808   if (!tree_fits_shwi_p (cst))
4809     return true;
4810   val = tree_to_shwi (cst);
4811
4812   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4813     {
4814       step = CHREC_RIGHT (chrec);
4815       if (!tree_fits_shwi_p (step))
4816         return true;
4817       cd = gcd (cd, tree_to_shwi (step));
4818       chrec = CHREC_LEFT (chrec);
4819     }
4820
4821   return val % cd == 0;
4822 }
4823
4824 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4825    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
4826    functions that describe the relation between the elements accessed
4827    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
4828    is verified:
4829
4830    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4831
4832 static void
4833 analyze_miv_subscript (tree chrec_a,
4834                        tree chrec_b,
4835                        conflict_function **overlaps_a,
4836                        conflict_function **overlaps_b,
4837                        tree *last_conflicts,
4838                        class loop *loop_nest)
4839 {
4840   tree type, difference;
4841
4842   dependence_stats.num_miv++;
4843   if (dump_file && (dump_flags & TDF_DETAILS))
4844     fprintf (dump_file, "(analyze_miv_subscript \n");
4845
4846   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4847   chrec_a = chrec_convert (type, chrec_a, NULL);
4848   chrec_b = chrec_convert (type, chrec_b, NULL);
4849   difference = chrec_fold_minus (type, chrec_a, chrec_b);
4850
4851   if (eq_evolutions_p (chrec_a, chrec_b))
4852     {
4853       /* Access functions are the same: all the elements are accessed
4854          in the same order.  */
4855       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4856       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4857       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4858       dependence_stats.num_miv_dependent++;
4859     }
4860
4861   else if (evolution_function_is_constant_p (difference)
4862            && evolution_function_is_affine_multivariate_p (chrec_a,
4863                                                            loop_nest->num)
4864            && !gcd_of_steps_may_divide_p (chrec_a, difference))
4865     {
4866       /* testsuite/.../ssa-chrec-33.c
4867          {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
4868
4869          The difference is 1, and all the evolution steps are multiples
4870          of 2, consequently there are no overlapping elements.  */
4871       *overlaps_a = conflict_fn_no_dependence ();
4872       *overlaps_b = conflict_fn_no_dependence ();
4873       *last_conflicts = integer_zero_node;
4874       dependence_stats.num_miv_independent++;
4875     }
4876
4877   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4878            && !chrec_contains_symbols (chrec_a, loop_nest)
4879            && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4880            && !chrec_contains_symbols (chrec_b, loop_nest))
4881     {
4882       /* testsuite/.../ssa-chrec-35.c
4883          {0, +, 1}_2  vs.  {0, +, 1}_3
4884          the overlapping elements are respectively located at iterations:
4885          {0, +, 1}_x and {0, +, 1}_x,
4886          in other words, we have the equality:
4887          {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4888
4889          Other examples:
4890          {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4891          {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4892
4893          {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4894          {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4895       */
4896       analyze_subscript_affine_affine (chrec_a, chrec_b,
4897                                        overlaps_a, overlaps_b, last_conflicts);
4898
4899       if (CF_NOT_KNOWN_P (*overlaps_a)
4900           || CF_NOT_KNOWN_P (*overlaps_b))
4901         dependence_stats.num_miv_unimplemented++;
4902       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4903                || CF_NO_DEPENDENCE_P (*overlaps_b))
4904         dependence_stats.num_miv_independent++;
4905       else
4906         dependence_stats.num_miv_dependent++;
4907     }
4908
4909   else
4910     {
4911       /* When the analysis is too difficult, answer "don't know".  */
4912       if (dump_file && (dump_flags & TDF_DETAILS))
4913         fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4914
4915       *overlaps_a = conflict_fn_not_known ();
4916       *overlaps_b = conflict_fn_not_known ();
4917       *last_conflicts = chrec_dont_know;
4918       dependence_stats.num_miv_unimplemented++;
4919     }
4920
4921   if (dump_file && (dump_flags & TDF_DETAILS))
4922     fprintf (dump_file, ")\n");
4923 }
4924
4925 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4926    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
4927    OVERLAP_ITERATIONS_B are initialized with two functions that
4928    describe the iterations that contain conflicting elements.
4929
4930    Remark: For an integer k >= 0, the following equality is true:
4931
4932    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4933 */
4934
4935 static void
4936 analyze_overlapping_iterations (tree chrec_a,
4937                                 tree chrec_b,
4938                                 conflict_function **overlap_iterations_a,
4939                                 conflict_function **overlap_iterations_b,
4940                                 tree *last_conflicts, class loop *loop_nest)
4941 {
4942   unsigned int lnn = loop_nest->num;
4943
4944   dependence_stats.num_subscript_tests++;
4945
4946   if (dump_file && (dump_flags & TDF_DETAILS))
4947     {
4948       fprintf (dump_file, "(analyze_overlapping_iterations \n");
4949       fprintf (dump_file, "  (chrec_a = ");
4950       print_generic_expr (dump_file, chrec_a);
4951       fprintf (dump_file, ")\n  (chrec_b = ");
4952       print_generic_expr (dump_file, chrec_b);
4953       fprintf (dump_file, ")\n");
4954     }
4955
4956   if (chrec_a == NULL_TREE
4957       || chrec_b == NULL_TREE
4958       || chrec_contains_undetermined (chrec_a)
4959       || chrec_contains_undetermined (chrec_b))
4960     {
4961       dependence_stats.num_subscript_undetermined++;
4962
4963       *overlap_iterations_a = conflict_fn_not_known ();
4964       *overlap_iterations_b = conflict_fn_not_known ();
4965     }
4966
4967   /* If they are the same chrec, and are affine, they overlap
4968      on every iteration.  */
4969   else if (eq_evolutions_p (chrec_a, chrec_b)
4970            && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4971                || operand_equal_p (chrec_a, chrec_b, 0)))
4972     {
4973       dependence_stats.num_same_subscript_function++;
4974       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4975       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4976       *last_conflicts = chrec_dont_know;
4977     }
4978
4979   /* If they aren't the same, and aren't affine, we can't do anything
4980      yet.  */
4981   else if ((chrec_contains_symbols (chrec_a)
4982             || chrec_contains_symbols (chrec_b))
4983            && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4984                || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4985     {
4986       dependence_stats.num_subscript_undetermined++;
4987       *overlap_iterations_a = conflict_fn_not_known ();
4988       *overlap_iterations_b = conflict_fn_not_known ();
4989     }
4990
4991   else if (ziv_subscript_p (chrec_a, chrec_b))
4992     analyze_ziv_subscript (chrec_a, chrec_b,
4993                            overlap_iterations_a, overlap_iterations_b,
4994                            last_conflicts);
4995
4996   else if (siv_subscript_p (chrec_a, chrec_b))
4997     analyze_siv_subscript (chrec_a, chrec_b,
4998                            overlap_iterations_a, overlap_iterations_b,
4999                            last_conflicts, lnn);
5000
5001   else
5002     analyze_miv_subscript (chrec_a, chrec_b,
5003                            overlap_iterations_a, overlap_iterations_b,
5004                            last_conflicts, loop_nest);
5005
5006   if (dump_file && (dump_flags & TDF_DETAILS))
5007     {
5008       fprintf (dump_file, "  (overlap_iterations_a = ");
5009       dump_conflict_function (dump_file, *overlap_iterations_a);
5010       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
5011       dump_conflict_function (dump_file, *overlap_iterations_b);
5012       fprintf (dump_file, "))\n");
5013     }
5014 }
5015
5016 /* Helper function for uniquely inserting distance vectors.  */
5017
5018 static void
5019 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
5020 {
5021   for (lambda_vector v : DDR_DIST_VECTS (ddr))
5022     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
5023       return;
5024
5025   DDR_DIST_VECTS (ddr).safe_push (dist_v);
5026 }
5027
5028 /* Helper function for uniquely inserting direction vectors.  */
5029
5030 static void
5031 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
5032 {
5033   for (lambda_vector v : DDR_DIR_VECTS (ddr))
5034     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
5035       return;
5036
5037   DDR_DIR_VECTS (ddr).safe_push (dir_v);
5038 }
5039
5040 /* Add a distance of 1 on all the loops outer than INDEX.  If we
5041    haven't yet determined a distance for this outer loop, push a new
5042    distance vector composed of the previous distance, and a distance
5043    of 1 for this outer loop.  Example:
5044
5045    | loop_1
5046    |   loop_2
5047    |     A[10]
5048    |   endloop_2
5049    | endloop_1
5050
5051    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
5052    save (0, 1), then we have to save (1, 0).  */
5053
5054 static void
5055 add_outer_distances (struct data_dependence_relation *ddr,
5056                      lambda_vector dist_v, int index)
5057 {
5058   /* For each outer loop where init_v is not set, the accesses are
5059      in dependence of distance 1 in the loop.  */
5060   while (--index >= 0)
5061     {
5062       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5063       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5064       save_v[index] = 1;
5065       save_dist_v (ddr, save_v);
5066     }
5067 }
5068
5069 /* Return false when fail to represent the data dependence as a
5070    distance vector.  A_INDEX is the index of the first reference
5071    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5072    second reference.  INIT_B is set to true when a component has been
5073    added to the distance vector DIST_V.  INDEX_CARRY is then set to
5074    the index in DIST_V that carries the dependence.  */
5075
5076 static bool
5077 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
5078                              unsigned int a_index, unsigned int b_index,
5079                              lambda_vector dist_v, bool *init_b,
5080                              int *index_carry)
5081 {
5082   unsigned i;
5083   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5084   class loop *loop = DDR_LOOP_NEST (ddr)[0];
5085
5086   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5087     {
5088       tree access_fn_a, access_fn_b;
5089       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
5090
5091       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5092         {
5093           non_affine_dependence_relation (ddr);
5094           return false;
5095         }
5096
5097       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
5098       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
5099
5100       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
5101           && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
5102         {
5103           HOST_WIDE_INT dist;
5104           int index;
5105           int var_a = CHREC_VARIABLE (access_fn_a);
5106           int var_b = CHREC_VARIABLE (access_fn_b);
5107
5108           if (var_a != var_b
5109               || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5110             {
5111               non_affine_dependence_relation (ddr);
5112               return false;
5113             }
5114
5115           /* When data references are collected in a loop while data
5116              dependences are analyzed in loop nest nested in the loop, we
5117              would have more number of access functions than number of
5118              loops.  Skip access functions of loops not in the loop nest.
5119
5120              See PR89725 for more information.  */
5121           if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
5122             continue;
5123
5124           dist = int_cst_value (SUB_DISTANCE (subscript));
5125           index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
5126           *index_carry = MIN (index, *index_carry);
5127
5128           /* This is the subscript coupling test.  If we have already
5129              recorded a distance for this loop (a distance coming from
5130              another subscript), it should be the same.  For example,
5131              in the following code, there is no dependence:
5132
5133              | loop i = 0, N, 1
5134              |   T[i+1][i] = ...
5135              |   ... = T[i][i]
5136              | endloop
5137           */
5138           if (init_v[index] != 0 && dist_v[index] != dist)
5139             {
5140               finalize_ddr_dependent (ddr, chrec_known);
5141               return false;
5142             }
5143
5144           dist_v[index] = dist;
5145           init_v[index] = 1;
5146           *init_b = true;
5147         }
5148       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
5149         {
5150           /* This can be for example an affine vs. constant dependence
5151              (T[i] vs. T[3]) that is not an affine dependence and is
5152              not representable as a distance vector.  */
5153           non_affine_dependence_relation (ddr);
5154           return false;
5155         }
5156       else
5157         *init_b = true;
5158     }
5159
5160   return true;
5161 }
5162
5163 /* Return true when the DDR contains only invariant access functions wrto. loop
5164    number LNUM.  */
5165
5166 static bool
5167 invariant_access_functions (const struct data_dependence_relation *ddr,
5168                             int lnum)
5169 {
5170   for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5171     if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
5172         || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
5173       return false;
5174
5175   return true;
5176 }
5177
5178 /* Helper function for the case where DDR_A and DDR_B are the same
5179    multivariate access function with a constant step.  For an example
5180    see pr34635-1.c.  */
5181
5182 static void
5183 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
5184 {
5185   int x_1, x_2;
5186   tree c_1 = CHREC_LEFT (c_2);
5187   tree c_0 = CHREC_LEFT (c_1);
5188   lambda_vector dist_v;
5189   HOST_WIDE_INT v1, v2, cd;
5190
5191   /* Polynomials with more than 2 variables are not handled yet.  When
5192      the evolution steps are parameters, it is not possible to
5193      represent the dependence using classical distance vectors.  */
5194   if (TREE_CODE (c_0) != INTEGER_CST
5195       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
5196       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
5197     {
5198       DDR_AFFINE_P (ddr) = false;
5199       return;
5200     }
5201
5202   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
5203   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
5204
5205   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
5206   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5207   v1 = int_cst_value (CHREC_RIGHT (c_1));
5208   v2 = int_cst_value (CHREC_RIGHT (c_2));
5209   cd = gcd (v1, v2);
5210   v1 /= cd;
5211   v2 /= cd;
5212
5213   if (v2 < 0)
5214     {
5215       v2 = -v2;
5216       v1 = -v1;
5217     }
5218
5219   dist_v[x_1] = v2;
5220   dist_v[x_2] = -v1;
5221   save_dist_v (ddr, dist_v);
5222
5223   add_outer_distances (ddr, dist_v, x_1);
5224 }
5225
5226 /* Helper function for the case where DDR_A and DDR_B are the same
5227    access functions.  */
5228
5229 static void
5230 add_other_self_distances (struct data_dependence_relation *ddr)
5231 {
5232   lambda_vector dist_v;
5233   unsigned i;
5234   int index_carry = DDR_NB_LOOPS (ddr);
5235   subscript *sub;
5236   class loop *loop = DDR_LOOP_NEST (ddr)[0];
5237
5238   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5239     {
5240       tree access_fun = SUB_ACCESS_FN (sub, 0);
5241
5242       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
5243         {
5244           if (!evolution_function_is_univariate_p (access_fun, loop->num))
5245             {
5246               if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
5247                 {
5248                   DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
5249                   return;
5250                 }
5251
5252               access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
5253
5254               if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
5255                 add_multivariate_self_dist (ddr, access_fun);
5256               else
5257                 /* The evolution step is not constant: it varies in
5258                    the outer loop, so this cannot be represented by a
5259                    distance vector.  For example in pr34635.c the
5260                    evolution is {0, +, {0, +, 4}_1}_2.  */
5261                 DDR_AFFINE_P (ddr) = false;
5262
5263               return;
5264             }
5265
5266           /* When data references are collected in a loop while data
5267              dependences are analyzed in loop nest nested in the loop, we
5268              would have more number of access functions than number of
5269              loops.  Skip access functions of loops not in the loop nest.
5270
5271              See PR89725 for more information.  */
5272           if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
5273                                   loop))
5274             continue;
5275
5276           index_carry = MIN (index_carry,
5277                              index_in_loop_nest (CHREC_VARIABLE (access_fun),
5278                                                  DDR_LOOP_NEST (ddr)));
5279         }
5280     }
5281
5282   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5283   add_outer_distances (ddr, dist_v, index_carry);
5284 }
5285
5286 static void
5287 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
5288 {
5289   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5290
5291   dist_v[0] = 1;
5292   save_dist_v (ddr, dist_v);
5293 }
5294
5295 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
5296    is the case for example when access functions are the same and
5297    equal to a constant, as in:
5298
5299    | loop_1
5300    |   A[3] = ...
5301    |   ... = A[3]
5302    | endloop_1
5303
5304    in which case the distance vectors are (0) and (1).  */
5305
5306 static void
5307 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
5308 {
5309   unsigned i, j;
5310
5311   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5312     {
5313       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
5314       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
5315       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
5316
5317       for (j = 0; j < ca->n; j++)
5318         if (affine_function_zero_p (ca->fns[j]))
5319           {
5320             insert_innermost_unit_dist_vector (ddr);
5321             return;
5322           }
5323
5324       for (j = 0; j < cb->n; j++)
5325         if (affine_function_zero_p (cb->fns[j]))
5326           {
5327             insert_innermost_unit_dist_vector (ddr);
5328             return;
5329           }
5330     }
5331 }
5332
5333 /* Return true when the DDR contains two data references that have the
5334    same access functions.  */
5335
5336 static inline bool
5337 same_access_functions (const struct data_dependence_relation *ddr)
5338 {
5339   for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5340     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5341                           SUB_ACCESS_FN (sub, 1)))
5342       return false;
5343
5344   return true;
5345 }
5346
5347 /* Compute the classic per loop distance vector.  DDR is the data
5348    dependence relation to build a vector from.  Return false when fail
5349    to represent the data dependence as a distance vector.  */
5350
5351 static bool
5352 build_classic_dist_vector (struct data_dependence_relation *ddr,
5353                            class loop *loop_nest)
5354 {
5355   bool init_b = false;
5356   int index_carry = DDR_NB_LOOPS (ddr);
5357   lambda_vector dist_v;
5358
5359   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5360     return false;
5361
5362   if (same_access_functions (ddr))
5363     {
5364       /* Save the 0 vector.  */
5365       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5366       save_dist_v (ddr, dist_v);
5367
5368       if (invariant_access_functions (ddr, loop_nest->num))
5369         add_distance_for_zero_overlaps (ddr);
5370
5371       if (DDR_NB_LOOPS (ddr) > 1)
5372         add_other_self_distances (ddr);
5373
5374       return true;
5375     }
5376
5377   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5378   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5379     return false;
5380
5381   /* Save the distance vector if we initialized one.  */
5382   if (init_b)
5383     {
5384       /* Verify a basic constraint: classic distance vectors should
5385          always be lexicographically positive.
5386
5387          Data references are collected in the order of execution of
5388          the program, thus for the following loop
5389
5390          | for (i = 1; i < 100; i++)
5391          |   for (j = 1; j < 100; j++)
5392          |     {
5393          |       t = T[j+1][i-1];  // A
5394          |       T[j][i] = t + 2;  // B
5395          |     }
5396
5397          references are collected following the direction of the wind:
5398          A then B.  The data dependence tests are performed also
5399          following this order, such that we're looking at the distance
5400          separating the elements accessed by A from the elements later
5401          accessed by B.  But in this example, the distance returned by
5402          test_dep (A, B) is lexicographically negative (-1, 1), that
5403          means that the access A occurs later than B with respect to
5404          the outer loop, ie. we're actually looking upwind.  In this
5405          case we solve test_dep (B, A) looking downwind to the
5406          lexicographically positive solution, that returns the
5407          distance vector (1, -1).  */
5408       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5409         {
5410           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5411           if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5412             return false;
5413           compute_subscript_distance (ddr);
5414           if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5415                                             &index_carry))
5416             return false;
5417           save_dist_v (ddr, save_v);
5418           DDR_REVERSED_P (ddr) = true;
5419
5420           /* In this case there is a dependence forward for all the
5421              outer loops:
5422
5423              | for (k = 1; k < 100; k++)
5424              |  for (i = 1; i < 100; i++)
5425              |   for (j = 1; j < 100; j++)
5426              |     {
5427              |       t = T[j+1][i-1];  // A
5428              |       T[j][i] = t + 2;  // B
5429              |     }
5430
5431              the vectors are:
5432              (0,  1, -1)
5433              (1,  1, -1)
5434              (1, -1,  1)
5435           */
5436           if (DDR_NB_LOOPS (ddr) > 1)
5437             {
5438               add_outer_distances (ddr, save_v, index_carry);
5439               add_outer_distances (ddr, dist_v, index_carry);
5440             }
5441         }
5442       else
5443         {
5444           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5445           lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5446
5447           if (DDR_NB_LOOPS (ddr) > 1)
5448             {
5449               lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5450
5451               if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5452                 return false;
5453               compute_subscript_distance (ddr);
5454               if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5455                                                 &index_carry))
5456                 return false;
5457
5458               save_dist_v (ddr, save_v);
5459               add_outer_distances (ddr, dist_v, index_carry);
5460               add_outer_distances (ddr, opposite_v, index_carry);
5461             }
5462           else
5463             save_dist_v (ddr, save_v);
5464         }
5465     }
5466   else
5467     {
5468       /* There is a distance of 1 on all the outer loops: Example:
5469          there is a dependence of distance 1 on loop_1 for the array A.
5470
5471          | loop_1
5472          |   A[5] = ...
5473          | endloop
5474       */
5475       add_outer_distances (ddr, dist_v,
5476                            lambda_vector_first_nz (dist_v,
5477                                                    DDR_NB_LOOPS (ddr), 0));
5478     }
5479
5480   if (dump_file && (dump_flags & TDF_DETAILS))
5481     {
5482       unsigned i;
5483
5484       fprintf (dump_file, "(build_classic_dist_vector\n");
5485       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5486         {
5487           fprintf (dump_file, "  dist_vector = (");
5488           print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5489                                DDR_NB_LOOPS (ddr));
5490           fprintf (dump_file, "  )\n");
5491         }
5492       fprintf (dump_file, ")\n");
5493     }
5494
5495   return true;
5496 }
5497
5498 /* Return the direction for a given distance.
5499    FIXME: Computing dir this way is suboptimal, since dir can catch
5500    cases that dist is unable to represent.  */
5501
5502 static inline enum data_dependence_direction
5503 dir_from_dist (int dist)
5504 {
5505   if (dist > 0)
5506     return dir_positive;
5507   else if (dist < 0)
5508     return dir_negative;
5509   else
5510     return dir_equal;
5511 }
5512
5513 /* Compute the classic per loop direction vector.  DDR is the data
5514    dependence relation to build a vector from.  */
5515
5516 static void
5517 build_classic_dir_vector (struct data_dependence_relation *ddr)
5518 {
5519   unsigned i, j;
5520   lambda_vector dist_v;
5521
5522   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5523     {
5524       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5525
5526       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5527         dir_v[j] = dir_from_dist (dist_v[j]);
5528
5529       save_dir_v (ddr, dir_v);
5530     }
5531 }
5532
5533 /* Helper function.  Returns true when there is a dependence between the
5534    data references.  A_INDEX is the index of the first reference (0 for
5535    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
5536
5537 static bool
5538 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5539                                unsigned int a_index, unsigned int b_index,
5540                                class loop *loop_nest)
5541 {
5542   unsigned int i;
5543   tree last_conflicts;
5544   struct subscript *subscript;
5545   tree res = NULL_TREE;
5546
5547   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5548     {
5549       conflict_function *overlaps_a, *overlaps_b;
5550
5551       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5552                                       SUB_ACCESS_FN (subscript, b_index),
5553                                       &overlaps_a, &overlaps_b,
5554                                       &last_conflicts, loop_nest);
5555
5556       if (SUB_CONFLICTS_IN_A (subscript))
5557         free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5558       if (SUB_CONFLICTS_IN_B (subscript))
5559         free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5560
5561       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5562       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5563       SUB_LAST_CONFLICT (subscript) = last_conflicts;
5564
5565       /* If there is any undetermined conflict function we have to
5566          give a conservative answer in case we cannot prove that
5567          no dependence exists when analyzing another subscript.  */
5568       if (CF_NOT_KNOWN_P (overlaps_a)
5569           || CF_NOT_KNOWN_P (overlaps_b))
5570         {
5571           res = chrec_dont_know;
5572           continue;
5573         }
5574
5575       /* When there is a subscript with no dependence we can stop.  */
5576       else if (CF_NO_DEPENDENCE_P (overlaps_a)
5577                || CF_NO_DEPENDENCE_P (overlaps_b))
5578         {
5579           res = chrec_known;
5580           break;
5581         }
5582     }
5583
5584   if (res == NULL_TREE)
5585     return true;
5586
5587   if (res == chrec_known)
5588     dependence_stats.num_dependence_independent++;
5589   else
5590     dependence_stats.num_dependence_undetermined++;
5591   finalize_ddr_dependent (ddr, res);
5592   return false;
5593 }
5594
5595 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
5596
5597 static void
5598 subscript_dependence_tester (struct data_dependence_relation *ddr,
5599                              class loop *loop_nest)
5600 {
5601   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5602     dependence_stats.num_dependence_dependent++;
5603
5604   compute_subscript_distance (ddr);
5605   if (build_classic_dist_vector (ddr, loop_nest))
5606     build_classic_dir_vector (ddr);
5607 }
5608
5609 /* Returns true when all the access functions of A are affine or
5610    constant with respect to LOOP_NEST.  */
5611
5612 static bool
5613 access_functions_are_affine_or_constant_p (const struct data_reference *a,
5614                                            const class loop *loop_nest)
5615 {
5616   vec<tree> fns = DR_ACCESS_FNS (a);
5617   for (tree t : fns)
5618     if (!evolution_function_is_invariant_p (t, loop_nest->num)
5619         && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5620       return false;
5621
5622   return true;
5623 }
5624
5625 /* This computes the affine dependence relation between A and B with
5626    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
5627    independence between two accesses, while CHREC_DONT_KNOW is used
5628    for representing the unknown relation.
5629
5630    Note that it is possible to stop the computation of the dependence
5631    relation the first time we detect a CHREC_KNOWN element for a given
5632    subscript.  */
5633
5634 void
5635 compute_affine_dependence (struct data_dependence_relation *ddr,
5636                            class loop *loop_nest)
5637 {
5638   struct data_reference *dra = DDR_A (ddr);
5639   struct data_reference *drb = DDR_B (ddr);
5640
5641   if (dump_file && (dump_flags & TDF_DETAILS))
5642     {
5643       fprintf (dump_file, "(compute_affine_dependence\n");
5644       fprintf (dump_file, "  ref_a: ");
5645       print_generic_expr (dump_file, DR_REF (dra));
5646       fprintf (dump_file, ", stmt_a: ");
5647       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5648       fprintf (dump_file, "  ref_b: ");
5649       print_generic_expr (dump_file, DR_REF (drb));
5650       fprintf (dump_file, ", stmt_b: ");
5651       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5652     }
5653
5654   /* Analyze only when the dependence relation is not yet known.  */
5655   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5656     {
5657       dependence_stats.num_dependence_tests++;
5658
5659       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5660           && access_functions_are_affine_or_constant_p (drb, loop_nest))
5661         subscript_dependence_tester (ddr, loop_nest);
5662
5663       /* As a last case, if the dependence cannot be determined, or if
5664          the dependence is considered too difficult to determine, answer
5665          "don't know".  */
5666       else
5667         {
5668           dependence_stats.num_dependence_undetermined++;
5669
5670           if (dump_file && (dump_flags & TDF_DETAILS))
5671             {
5672               fprintf (dump_file, "Data ref a:\n");
5673               dump_data_reference (dump_file, dra);
5674               fprintf (dump_file, "Data ref b:\n");
5675               dump_data_reference (dump_file, drb);
5676               fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5677             }
5678           finalize_ddr_dependent (ddr, chrec_dont_know);
5679         }
5680     }
5681
5682   if (dump_file && (dump_flags & TDF_DETAILS))
5683     {
5684       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5685         fprintf (dump_file, ") -> no dependence\n");
5686       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5687         fprintf (dump_file, ") -> dependence analysis failed\n");
5688       else
5689         fprintf (dump_file, ")\n");
5690     }
5691 }
5692
5693 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5694    the data references in DATAREFS, in the LOOP_NEST.  When
5695    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5696    relations.  Return true when successful, i.e. data references number
5697    is small enough to be handled.  */
5698
5699 bool
5700 compute_all_dependences (const vec<data_reference_p> &datarefs,
5701                          vec<ddr_p> *dependence_relations,
5702                          const vec<loop_p> &loop_nest,
5703                          bool compute_self_and_rr)
5704 {
5705   struct data_dependence_relation *ddr;
5706   struct data_reference *a, *b;
5707   unsigned int i, j;
5708
5709   if ((int) datarefs.length ()
5710       > param_loop_max_datarefs_for_datadeps)
5711     {
5712       struct data_dependence_relation *ddr;
5713
5714       /* Insert a single relation into dependence_relations:
5715          chrec_dont_know.  */
5716       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5717       dependence_relations->safe_push (ddr);
5718       return false;
5719     }
5720
5721   FOR_EACH_VEC_ELT (datarefs, i, a)
5722     for (j = i + 1; datarefs.iterate (j, &b); j++)
5723       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5724         {
5725           ddr = initialize_data_dependence_relation (a, b, loop_nest);
5726           dependence_relations->safe_push (ddr);
5727           if (loop_nest.exists ())
5728             compute_affine_dependence (ddr, loop_nest[0]);
5729         }
5730
5731   if (compute_self_and_rr)
5732     FOR_EACH_VEC_ELT (datarefs, i, a)
5733       {
5734         ddr = initialize_data_dependence_relation (a, a, loop_nest);
5735         dependence_relations->safe_push (ddr);
5736         if (loop_nest.exists ())
5737           compute_affine_dependence (ddr, loop_nest[0]);
5738       }
5739
5740   return true;
5741 }
5742
5743 /* Describes a location of a memory reference.  */
5744
5745 struct data_ref_loc
5746 {
5747   /* The memory reference.  */
5748   tree ref;
5749
5750   /* True if the memory reference is read.  */
5751   bool is_read;
5752
5753   /* True if the data reference is conditional within the containing
5754      statement, i.e. if it might not occur even when the statement
5755      is executed and runs to completion.  */
5756   bool is_conditional_in_stmt;
5757 };
5758
5759
5760 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
5761    true if STMT clobbers memory, false otherwise.  */
5762
5763 static bool
5764 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5765 {
5766   bool clobbers_memory = false;
5767   data_ref_loc ref;
5768   tree op0, op1;
5769   enum gimple_code stmt_code = gimple_code (stmt);
5770
5771   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5772      As we cannot model data-references to not spelled out
5773      accesses give up if they may occur.  */
5774   if (stmt_code == GIMPLE_CALL
5775       && !(gimple_call_flags (stmt) & ECF_CONST))
5776     {
5777       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
5778       if (gimple_call_internal_p (stmt))
5779         switch (gimple_call_internal_fn (stmt))
5780           {
5781           case IFN_GOMP_SIMD_LANE:
5782             {
5783               class loop *loop = gimple_bb (stmt)->loop_father;
5784               tree uid = gimple_call_arg (stmt, 0);
5785               gcc_assert (TREE_CODE (uid) == SSA_NAME);
5786               if (loop == NULL
5787                   || loop->simduid != SSA_NAME_VAR (uid))
5788                 clobbers_memory = true;
5789               break;
5790             }
5791           case IFN_MASK_LOAD:
5792           case IFN_MASK_STORE:
5793             break;
5794           default:
5795             clobbers_memory = true;
5796             break;
5797           }
5798       else
5799         clobbers_memory = true;
5800     }
5801   else if (stmt_code == GIMPLE_ASM
5802            && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5803                || gimple_vuse (stmt)))
5804     clobbers_memory = true;
5805
5806   if (!gimple_vuse (stmt))
5807     return clobbers_memory;
5808
5809   if (stmt_code == GIMPLE_ASSIGN)
5810     {
5811       tree base;
5812       op0 = gimple_assign_lhs (stmt);
5813       op1 = gimple_assign_rhs1 (stmt);
5814
5815       if (DECL_P (op1)
5816           || (REFERENCE_CLASS_P (op1)
5817               && (base = get_base_address (op1))
5818               && TREE_CODE (base) != SSA_NAME
5819               && !is_gimple_min_invariant (base)))
5820         {
5821           ref.ref = op1;
5822           ref.is_read = true;
5823           ref.is_conditional_in_stmt = false;
5824           references->safe_push (ref);
5825         }
5826     }
5827   else if (stmt_code == GIMPLE_CALL)
5828     {
5829       unsigned i, n;
5830       tree ptr, type;
5831       unsigned int align;
5832
5833       ref.is_read = false;
5834       if (gimple_call_internal_p (stmt))
5835         switch (gimple_call_internal_fn (stmt))
5836           {
5837           case IFN_MASK_LOAD:
5838             if (gimple_call_lhs (stmt) == NULL_TREE)
5839               break;
5840             ref.is_read = true;
5841             /* FALLTHRU */
5842           case IFN_MASK_STORE:
5843             ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5844             align = tree_to_shwi (gimple_call_arg (stmt, 1));
5845             if (ref.is_read)
5846               type = TREE_TYPE (gimple_call_lhs (stmt));
5847             else
5848               type = TREE_TYPE (gimple_call_arg (stmt, 3));
5849             if (TYPE_ALIGN (type) != align)
5850               type = build_aligned_type (type, align);
5851             ref.is_conditional_in_stmt = true;
5852             ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5853                                    ptr);
5854             references->safe_push (ref);
5855             return false;
5856           default:
5857             break;
5858           }
5859
5860       op0 = gimple_call_lhs (stmt);
5861       n = gimple_call_num_args (stmt);
5862       for (i = 0; i < n; i++)
5863         {
5864           op1 = gimple_call_arg (stmt, i);
5865
5866           if (DECL_P (op1)
5867               || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5868             {
5869               ref.ref = op1;
5870               ref.is_read = true;
5871               ref.is_conditional_in_stmt = false;
5872               references->safe_push (ref);
5873             }
5874         }
5875     }
5876   else
5877     return clobbers_memory;
5878
5879   if (op0
5880       && (DECL_P (op0)
5881           || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5882     {
5883       ref.ref = op0;
5884       ref.is_read = false;
5885       ref.is_conditional_in_stmt = false;
5886       references->safe_push (ref);
5887     }
5888   return clobbers_memory;
5889 }
5890
5891
5892 /* Returns true if the loop-nest has any data reference.  */
5893
5894 bool
5895 loop_nest_has_data_refs (loop_p loop)
5896 {
5897   basic_block *bbs = get_loop_body (loop);
5898   auto_vec<data_ref_loc, 3> references;
5899
5900   for (unsigned i = 0; i < loop->num_nodes; i++)
5901     {
5902       basic_block bb = bbs[i];
5903       gimple_stmt_iterator bsi;
5904
5905       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5906         {
5907           gimple *stmt = gsi_stmt (bsi);
5908           get_references_in_stmt (stmt, &references);
5909           if (references.length ())
5910             {
5911               free (bbs);
5912               return true;
5913             }
5914         }
5915     }
5916   free (bbs);
5917   return false;
5918 }
5919
5920 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
5921    reference, returns false, otherwise returns true.  NEST is the outermost
5922    loop of the loop nest in which the references should be analyzed.  */
5923
5924 opt_result
5925 find_data_references_in_stmt (class loop *nest, gimple *stmt,
5926                               vec<data_reference_p> *datarefs)
5927 {
5928   auto_vec<data_ref_loc, 2> references;
5929   data_reference_p dr;
5930
5931   if (get_references_in_stmt (stmt, &references))
5932     return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5933                                    stmt);
5934
5935   for (const data_ref_loc &ref : references)
5936     {
5937       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5938                             loop_containing_stmt (stmt), ref.ref,
5939                             stmt, ref.is_read, ref.is_conditional_in_stmt);
5940       gcc_assert (dr != NULL);
5941       datarefs->safe_push (dr);
5942     }
5943
5944   return opt_result::success ();
5945 }
5946
5947 /* Stores the data references in STMT to DATAREFS.  If there is an
5948    unanalyzable reference, returns false, otherwise returns true.
5949    NEST is the outermost loop of the loop nest in which the references
5950    should be instantiated, LOOP is the loop in which the references
5951    should be analyzed.  */
5952
5953 bool
5954 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5955                                        vec<data_reference_p> *datarefs)
5956 {
5957   auto_vec<data_ref_loc, 2> references;
5958   bool ret = true;
5959   data_reference_p dr;
5960
5961   if (get_references_in_stmt (stmt, &references))
5962     return false;
5963
5964   for (const data_ref_loc &ref : references)
5965     {
5966       dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read,
5967                             ref.is_conditional_in_stmt);
5968       gcc_assert (dr != NULL);
5969       datarefs->safe_push (dr);
5970     }
5971
5972   return ret;
5973 }
5974
5975 /* Search the data references in LOOP, and record the information into
5976    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5977    difficult case, returns NULL_TREE otherwise.  */
5978
5979 tree
5980 find_data_references_in_bb (class loop *loop, basic_block bb,
5981                             vec<data_reference_p> *datarefs)
5982 {
5983   gimple_stmt_iterator bsi;
5984
5985   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5986     {
5987       gimple *stmt = gsi_stmt (bsi);
5988
5989       if (!find_data_references_in_stmt (loop, stmt, datarefs))
5990         {
5991           struct data_reference *res;
5992           res = XCNEW (struct data_reference);
5993           datarefs->safe_push (res);
5994
5995           return chrec_dont_know;
5996         }
5997     }
5998
5999   return NULL_TREE;
6000 }
6001
6002 /* Search the data references in LOOP, and record the information into
6003    DATAREFS.  Returns chrec_dont_know when failing to analyze a
6004    difficult case, returns NULL_TREE otherwise.
6005
6006    TODO: This function should be made smarter so that it can handle address
6007    arithmetic as if they were array accesses, etc.  */
6008
6009 tree
6010 find_data_references_in_loop (class loop *loop,
6011                               vec<data_reference_p> *datarefs)
6012 {
6013   basic_block bb, *bbs;
6014   unsigned int i;
6015
6016   bbs = get_loop_body_in_dom_order (loop);
6017
6018   for (i = 0; i < loop->num_nodes; i++)
6019     {
6020       bb = bbs[i];
6021
6022       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
6023         {
6024           free (bbs);
6025           return chrec_dont_know;
6026         }
6027     }
6028   free (bbs);
6029
6030   return NULL_TREE;
6031 }
6032
6033 /* Return the alignment in bytes that DRB is guaranteed to have at all
6034    times.  */
6035
6036 unsigned int
6037 dr_alignment (innermost_loop_behavior *drb)
6038 {
6039   /* Get the alignment of BASE_ADDRESS + INIT.  */
6040   unsigned int alignment = drb->base_alignment;
6041   unsigned int misalignment = (drb->base_misalignment
6042                                + TREE_INT_CST_LOW (drb->init));
6043   if (misalignment != 0)
6044     alignment = MIN (alignment, misalignment & -misalignment);
6045
6046   /* Cap it to the alignment of OFFSET.  */
6047   if (!integer_zerop (drb->offset))
6048     alignment = MIN (alignment, drb->offset_alignment);
6049
6050   /* Cap it to the alignment of STEP.  */
6051   if (!integer_zerop (drb->step))
6052     alignment = MIN (alignment, drb->step_alignment);
6053
6054   return alignment;
6055 }
6056
6057 /* If BASE is a pointer-typed SSA name, try to find the object that it
6058    is based on.  Return this object X on success and store the alignment
6059    in bytes of BASE - &X in *ALIGNMENT_OUT.  */
6060
6061 static tree
6062 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
6063 {
6064   if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
6065     return NULL_TREE;
6066
6067   gimple *def = SSA_NAME_DEF_STMT (base);
6068   base = analyze_scalar_evolution (loop_containing_stmt (def), base);
6069
6070   /* Peel chrecs and record the minimum alignment preserved by
6071      all steps.  */
6072   unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6073   while (TREE_CODE (base) == POLYNOMIAL_CHREC)
6074     {
6075       unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
6076       alignment = MIN (alignment, step_alignment);
6077       base = CHREC_LEFT (base);
6078     }
6079
6080   /* Punt if the expression is too complicated to handle.  */
6081   if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
6082     return NULL_TREE;
6083
6084   /* The only useful cases are those for which a dereference folds to something
6085      other than an INDIRECT_REF.  */
6086   tree ref_type = TREE_TYPE (TREE_TYPE (base));
6087   tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
6088   if (!ref)
6089     return NULL_TREE;
6090
6091   /* Analyze the base to which the steps we peeled were applied.  */
6092   poly_int64 bitsize, bitpos, bytepos;
6093   machine_mode mode;
6094   int unsignedp, reversep, volatilep;
6095   tree offset;
6096   base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
6097                               &unsignedp, &reversep, &volatilep);
6098   if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
6099     return NULL_TREE;
6100
6101   /* Restrict the alignment to that guaranteed by the offsets.  */
6102   unsigned int bytepos_alignment = known_alignment (bytepos);
6103   if (bytepos_alignment != 0)
6104     alignment = MIN (alignment, bytepos_alignment);
6105   if (offset)
6106     {
6107       unsigned int offset_alignment = highest_pow2_factor (offset);
6108       alignment = MIN (alignment, offset_alignment);
6109     }
6110
6111   *alignment_out = alignment;
6112   return base;
6113 }
6114
6115 /* Return the object whose alignment would need to be changed in order
6116    to increase the alignment of ADDR.  Store the maximum achievable
6117    alignment in *MAX_ALIGNMENT.  */
6118
6119 tree
6120 get_base_for_alignment (tree addr, unsigned int *max_alignment)
6121 {
6122   tree base = get_base_for_alignment_1 (addr, max_alignment);
6123   if (base)
6124     return base;
6125
6126   if (TREE_CODE (addr) == ADDR_EXPR)
6127     addr = TREE_OPERAND (addr, 0);
6128   *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6129   return addr;
6130 }
6131
6132 /* Recursive helper function.  */
6133
6134 static bool
6135 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
6136 {
6137   /* Inner loops of the nest should not contain siblings.  Example:
6138      when there are two consecutive loops,
6139
6140      | loop_0
6141      |   loop_1
6142      |     A[{0, +, 1}_1]
6143      |   endloop_1
6144      |   loop_2
6145      |     A[{0, +, 1}_2]
6146      |   endloop_2
6147      | endloop_0
6148
6149      the dependence relation cannot be captured by the distance
6150      abstraction.  */
6151   if (loop->next)
6152     return false;
6153
6154   loop_nest->safe_push (loop);
6155   if (loop->inner)
6156     return find_loop_nest_1 (loop->inner, loop_nest);
6157   return true;
6158 }
6159
6160 /* Return false when the LOOP is not well nested.  Otherwise return
6161    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
6162    contain the loops from the outermost to the innermost, as they will
6163    appear in the classic distance vector.  */
6164
6165 bool
6166 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
6167 {
6168   loop_nest->safe_push (loop);
6169   if (loop->inner)
6170     return find_loop_nest_1 (loop->inner, loop_nest);
6171   return true;
6172 }
6173
6174 /* Returns true when the data dependences have been computed, false otherwise.
6175    Given a loop nest LOOP, the following vectors are returned:
6176    DATAREFS is initialized to all the array elements contained in this loop,
6177    DEPENDENCE_RELATIONS contains the relations between the data references.
6178    Compute read-read and self relations if
6179    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
6180
6181 bool
6182 compute_data_dependences_for_loop (class loop *loop,
6183                                    bool compute_self_and_read_read_dependences,
6184                                    vec<loop_p> *loop_nest,
6185                                    vec<data_reference_p> *datarefs,
6186                                    vec<ddr_p> *dependence_relations)
6187 {
6188   bool res = true;
6189
6190   memset (&dependence_stats, 0, sizeof (dependence_stats));
6191
6192   /* If the loop nest is not well formed, or one of the data references
6193      is not computable, give up without spending time to compute other
6194      dependences.  */
6195   if (!loop
6196       || !find_loop_nest (loop, loop_nest)
6197       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
6198       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
6199                                    compute_self_and_read_read_dependences))
6200     res = false;
6201
6202   if (dump_file && (dump_flags & TDF_STATS))
6203     {
6204       fprintf (dump_file, "Dependence tester statistics:\n");
6205
6206       fprintf (dump_file, "Number of dependence tests: %d\n",
6207                dependence_stats.num_dependence_tests);
6208       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
6209                dependence_stats.num_dependence_dependent);
6210       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
6211                dependence_stats.num_dependence_independent);
6212       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
6213                dependence_stats.num_dependence_undetermined);
6214
6215       fprintf (dump_file, "Number of subscript tests: %d\n",
6216                dependence_stats.num_subscript_tests);
6217       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
6218                dependence_stats.num_subscript_undetermined);
6219       fprintf (dump_file, "Number of same subscript function: %d\n",
6220                dependence_stats.num_same_subscript_function);
6221
6222       fprintf (dump_file, "Number of ziv tests: %d\n",
6223                dependence_stats.num_ziv);
6224       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
6225                dependence_stats.num_ziv_dependent);
6226       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
6227                dependence_stats.num_ziv_independent);
6228       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
6229                dependence_stats.num_ziv_unimplemented);
6230
6231       fprintf (dump_file, "Number of siv tests: %d\n",
6232                dependence_stats.num_siv);
6233       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
6234                dependence_stats.num_siv_dependent);
6235       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
6236                dependence_stats.num_siv_independent);
6237       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
6238                dependence_stats.num_siv_unimplemented);
6239
6240       fprintf (dump_file, "Number of miv tests: %d\n",
6241                dependence_stats.num_miv);
6242       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
6243                dependence_stats.num_miv_dependent);
6244       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
6245                dependence_stats.num_miv_independent);
6246       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
6247                dependence_stats.num_miv_unimplemented);
6248     }
6249
6250   return res;
6251 }
6252
6253 /* Free the memory used by a data dependence relation DDR.  */
6254
6255 void
6256 free_dependence_relation (struct data_dependence_relation *ddr)
6257 {
6258   if (ddr == NULL)
6259     return;
6260
6261   if (DDR_SUBSCRIPTS (ddr).exists ())
6262     free_subscripts (DDR_SUBSCRIPTS (ddr));
6263   DDR_DIST_VECTS (ddr).release ();
6264   DDR_DIR_VECTS (ddr).release ();
6265
6266   free (ddr);
6267 }
6268
6269 /* Free the memory used by the data dependence relations from
6270    DEPENDENCE_RELATIONS.  */
6271
6272 void
6273 free_dependence_relations (vec<ddr_p>& dependence_relations)
6274 {
6275   for (data_dependence_relation *ddr : dependence_relations)
6276     if (ddr)
6277       free_dependence_relation (ddr);
6278
6279   dependence_relations.release ();
6280 }
6281
6282 /* Free the memory used by the data references from DATAREFS.  */
6283
6284 void
6285 free_data_refs (vec<data_reference_p>& datarefs)
6286 {
6287   for (data_reference *dr : datarefs)
6288     free_data_ref (dr);
6289   datarefs.release ();
6290 }
6291
6292 /* Common routine implementing both dr_direction_indicator and
6293    dr_zero_step_indicator.  Return USEFUL_MIN if the indicator is known
6294    to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6295    Return the step as the indicator otherwise.  */
6296
6297 static tree
6298 dr_step_indicator (struct data_reference *dr, int useful_min)
6299 {
6300   tree step = DR_STEP (dr);
6301   if (!step)
6302     return NULL_TREE;
6303   STRIP_NOPS (step);
6304   /* Look for cases where the step is scaled by a positive constant
6305      integer, which will often be the access size.  If the multiplication
6306      doesn't change the sign (due to overflow effects) then we can
6307      test the unscaled value instead.  */
6308   if (TREE_CODE (step) == MULT_EXPR
6309       && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6310       && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6311     {
6312       tree factor = TREE_OPERAND (step, 1);
6313       step = TREE_OPERAND (step, 0);
6314
6315       /* Strip widening and truncating conversions as well as nops.  */
6316       if (CONVERT_EXPR_P (step)
6317           && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6318         step = TREE_OPERAND (step, 0);
6319       tree type = TREE_TYPE (step);
6320
6321       /* Get the range of step values that would not cause overflow.  */
6322       widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6323                          / wi::to_widest (factor));
6324       widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6325                          / wi::to_widest (factor));
6326
6327       /* Get the range of values that the unconverted step actually has.  */
6328       wide_int step_min, step_max;
6329       value_range vr;
6330       if (TREE_CODE (step) != SSA_NAME
6331           || !get_range_query (cfun)->range_of_expr (vr, step)
6332           || vr.kind () != VR_RANGE)
6333         {
6334           step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6335           step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6336         }
6337       else
6338         {
6339           step_min = vr.lower_bound ();
6340           step_max = vr.upper_bound ();
6341         }
6342
6343       /* Check whether the unconverted step has an acceptable range.  */
6344       signop sgn = TYPE_SIGN (type);
6345       if (wi::les_p (minv, widest_int::from (step_min, sgn))
6346           && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6347         {
6348           if (wi::ge_p (step_min, useful_min, sgn))
6349             return ssize_int (useful_min);
6350           else if (wi::lt_p (step_max, 0, sgn))
6351             return ssize_int (-1);
6352           else
6353             return fold_convert (ssizetype, step);
6354         }
6355     }
6356   return DR_STEP (dr);
6357 }
6358
6359 /* Return a value that is negative iff DR has a negative step.  */
6360
6361 tree
6362 dr_direction_indicator (struct data_reference *dr)
6363 {
6364   return dr_step_indicator (dr, 0);
6365 }
6366
6367 /* Return a value that is zero iff DR has a zero step.  */
6368
6369 tree
6370 dr_zero_step_indicator (struct data_reference *dr)
6371 {
6372   return dr_step_indicator (dr, 1);
6373 }
6374
6375 /* Return true if DR is known to have a nonnegative (but possibly zero)
6376    step.  */
6377
6378 bool
6379 dr_known_forward_stride_p (struct data_reference *dr)
6380 {
6381   tree indicator = dr_direction_indicator (dr);
6382   tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6383                                    fold_convert (ssizetype, indicator),
6384                                    ssize_int (0));
6385   return neg_step_val && integer_zerop (neg_step_val);
6386 }