gcc/tree-data-ref.cc

   1 /* Data references and dependences detectors.
   2    Copyright (C) 2003-2023 Free Software Foundation, Inc.
   3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass walks a given loop structure searching for array
  22    references.  The information about the array accesses is recorded
  23    in DATA_REFERENCE structures.
  24
  25    The basic test for determining the dependences is:
  26    given two access functions chrec1 and chrec2 to a same array, and
  27    x and y two vectors from the iteration domain, the same element of
  28    the array is accessed twice at iterations x and y if and only if:
  29    |             chrec1 (x) == chrec2 (y).
  30
  31    The goals of this analysis are:
  32
  33    - to determine the independence: the relation between two
  34      independent accesses is qualified with the chrec_known (this
  35      information allows a loop parallelization),
  36
  37    - when two data references access the same data, to qualify the
  38      dependence relation with classic dependence representations:
  39
  40        - distance vectors
  41        - direction vectors
  42        - loop carried level dependence
  43        - polyhedron dependence
  44      or with the chains of recurrences based representation,
  45
  46    - to define a knowledge base for storing the data dependence
  47      information,
  48
  49    - to define an interface to access this data.
  50
  51
  52    Definitions:
  53
  54    - subscript: given two array accesses a subscript is the tuple
  55    composed of the access functions for a given dimension.  Example:
  56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
  57    (f1, g1), (f2, g2), (f3, g3).
  58
  59    - Diophantine equation: an equation whose coefficients and
  60    solutions are integer constants, for example the equation
  61    |   3*x + 2*y = 1
  62    has an integer solution x = 1 and y = -1.
  63
  64    References:
  65
  66    - "Advanced Compilation for High Performance Computing" by Randy
  67    Allen and Ken Kennedy.
  68    http://citeseer.ist.psu.edu/goff91practical.html
  69
  70    - "Loop Transformations for Restructuring Compilers - The Foundations"
  71    by Utpal Banerjee.
  72
  73
  74 */
  75
  76 #include "config.h"
  77 #include "system.h"
  78 #include "coretypes.h"
  79 #include "backend.h"
  80 #include "rtl.h"
  81 #include "tree.h"
  82 #include "gimple.h"
  83 #include "gimple-pretty-print.h"
  84 #include "alias.h"
  85 #include "fold-const.h"
  86 #include "expr.h"
  87 #include "gimple-iterator.h"
  88 #include "tree-ssa-loop-niter.h"
  89 #include "tree-ssa-loop.h"
  90 #include "tree-ssa.h"
  91 #include "cfgloop.h"
  92 #include "tree-data-ref.h"
  93 #include "tree-scalar-evolution.h"
  94 #include "dumpfile.h"
  95 #include "tree-affine.h"
  96 #include "builtins.h"
  97 #include "tree-eh.h"
  98 #include "ssa.h"
  99 #include "internal-fn.h"
 100 #include "vr-values.h"
 101 #include "range-op.h"
 102 #include "tree-ssa-loop-ivopts.h"
 103 #include "calls.h"
 104
 105 static struct datadep_stats
 106 {
 107   int num_dependence_tests;
 108   int num_dependence_dependent;
 109   int num_dependence_independent;
 110   int num_dependence_undetermined;
 111
 112   int num_subscript_tests;
 113   int num_subscript_undetermined;
 114   int num_same_subscript_function;
 115
 116   int num_ziv;
 117   int num_ziv_independent;
 118   int num_ziv_dependent;
 119   int num_ziv_unimplemented;
 120
 121   int num_siv;
 122   int num_siv_independent;
 123   int num_siv_dependent;
 124   int num_siv_unimplemented;
 125
 126   int num_miv;
 127   int num_miv_independent;
 128   int num_miv_dependent;
 129   int num_miv_unimplemented;
 130 } dependence_stats;
 131
 132 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
 133                                            unsigned int, unsigned int,
 134                                            class loop *);
 135 /* Returns true iff A divides B.  */
 136
 137 static inline bool
 138 tree_fold_divides_p (const_tree a, const_tree b)
 139 {
 140   gcc_assert (TREE_CODE (a) == INTEGER_CST);
 141   gcc_assert (TREE_CODE (b) == INTEGER_CST);
 142   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
 143 }
 144
 145 /* Returns true iff A divides B.  */
 146
 147 static inline bool
 148 int_divides_p (lambda_int a, lambda_int b)
 149 {
 150   return ((b % a) == 0);
 151 }
 152
 153 /* Return true if reference REF contains a union access.  */
 154
 155 static bool
 156 ref_contains_union_access_p (tree ref)
 157 {
 158   while (handled_component_p (ref))
 159     {
 160       ref = TREE_OPERAND (ref, 0);
 161       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
 162           || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
 163         return true;
 164     }
 165   return false;
 166 }
 167
 168 \f
 169
 170 /* Dump into FILE all the data references from DATAREFS.  */
 171
 172 static void
 173 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
 174 {
 175   for (data_reference *dr : datarefs)
 176     dump_data_reference (file, dr);
 177 }
 178
 179 /* Unified dump into FILE all the data references from DATAREFS.  */
 180
 181 DEBUG_FUNCTION void
 182 debug (vec<data_reference_p> &ref)
 183 {
 184   dump_data_references (stderr, ref);
 185 }
 186
 187 DEBUG_FUNCTION void
 188 debug (vec<data_reference_p> *ptr)
 189 {
 190   if (ptr)
 191     debug (*ptr);
 192   else
 193     fprintf (stderr, "<nil>\n");
 194 }
 195
 196
 197 /* Dump into STDERR all the data references from DATAREFS.  */
 198
 199 DEBUG_FUNCTION void
 200 debug_data_references (vec<data_reference_p> datarefs)
 201 {
 202   dump_data_references (stderr, datarefs);
 203 }
 204
 205 /* Print to STDERR the data_reference DR.  */
 206
 207 DEBUG_FUNCTION void
 208 debug_data_reference (struct data_reference *dr)
 209 {
 210   dump_data_reference (stderr, dr);
 211 }
 212
 213 /* Dump function for a DATA_REFERENCE structure.  */
 214
 215 void
 216 dump_data_reference (FILE *outf,
 217                      struct data_reference *dr)
 218 {
 219   unsigned int i;
 220
 221   fprintf (outf, "#(Data Ref: \n");
 222   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
 223   fprintf (outf, "#  stmt: ");
 224   print_gimple_stmt (outf, DR_STMT (dr), 0);
 225   fprintf (outf, "#  ref: ");
 226   print_generic_stmt (outf, DR_REF (dr));
 227   fprintf (outf, "#  base_object: ");
 228   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
 229
 230   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
 231     {
 232       fprintf (outf, "#  Access function %d: ", i);
 233       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
 234     }
 235   fprintf (outf, "#)\n");
 236 }
 237
 238 /* Unified dump function for a DATA_REFERENCE structure.  */
 239
 240 DEBUG_FUNCTION void
 241 debug (data_reference &ref)
 242 {
 243   dump_data_reference (stderr, &ref);
 244 }
 245
 246 DEBUG_FUNCTION void
 247 debug (data_reference *ptr)
 248 {
 249   if (ptr)
 250     debug (*ptr);
 251   else
 252     fprintf (stderr, "<nil>\n");
 253 }
 254
 255
 256 /* Dumps the affine function described by FN to the file OUTF.  */
 257
 258 DEBUG_FUNCTION void
 259 dump_affine_function (FILE *outf, affine_fn fn)
 260 {
 261   unsigned i;
 262   tree coef;
 263
 264   print_generic_expr (outf, fn[0], TDF_SLIM);
 265   for (i = 1; fn.iterate (i, &coef); i++)
 266     {
 267       fprintf (outf, " + ");
 268       print_generic_expr (outf, coef, TDF_SLIM);
 269       fprintf (outf, " * x_%u", i);
 270     }
 271 }
 272
 273 /* Dumps the conflict function CF to the file OUTF.  */
 274
 275 DEBUG_FUNCTION void
 276 dump_conflict_function (FILE *outf, conflict_function *cf)
 277 {
 278   unsigned i;
 279
 280   if (cf->n == NO_DEPENDENCE)
 281     fprintf (outf, "no dependence");
 282   else if (cf->n == NOT_KNOWN)
 283     fprintf (outf, "not known");
 284   else
 285     {
 286       for (i = 0; i < cf->n; i++)
 287         {
 288           if (i != 0)
 289             fprintf (outf, " ");
 290           fprintf (outf, "[");
 291           dump_affine_function (outf, cf->fns[i]);
 292           fprintf (outf, "]");
 293         }
 294     }
 295 }
 296
 297 /* Dump function for a SUBSCRIPT structure.  */
 298
 299 DEBUG_FUNCTION void
 300 dump_subscript (FILE *outf, struct subscript *subscript)
 301 {
 302   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
 303
 304   fprintf (outf, "\n (subscript \n");
 305   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
 306   dump_conflict_function (outf, cf);
 307   if (CF_NONTRIVIAL_P (cf))
 308     {
 309       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 310       fprintf (outf, "\n  last_conflict: ");
 311       print_generic_expr (outf, last_iteration);
 312     }
 313
 314   cf = SUB_CONFLICTS_IN_B (subscript);
 315   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
 316   dump_conflict_function (outf, cf);
 317   if (CF_NONTRIVIAL_P (cf))
 318     {
 319       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 320       fprintf (outf, "\n  last_conflict: ");
 321       print_generic_expr (outf, last_iteration);
 322     }
 323
 324   fprintf (outf, "\n  (Subscript distance: ");
 325   print_generic_expr (outf, SUB_DISTANCE (subscript));
 326   fprintf (outf, " ))\n");
 327 }
 328
 329 /* Print the classic direction vector DIRV to OUTF.  */
 330
 331 DEBUG_FUNCTION void
 332 print_direction_vector (FILE *outf,
 333                         lambda_vector dirv,
 334                         int length)
 335 {
 336   int eq;
 337
 338   for (eq = 0; eq < length; eq++)
 339     {
 340       enum data_dependence_direction dir = ((enum data_dependence_direction)
 341                                             dirv[eq]);
 342
 343       switch (dir)
 344         {
 345         case dir_positive:
 346           fprintf (outf, "    +");
 347           break;
 348         case dir_negative:
 349           fprintf (outf, "    -");
 350           break;
 351         case dir_equal:
 352           fprintf (outf, "    =");
 353           break;
 354         case dir_positive_or_equal:
 355           fprintf (outf, "   +=");
 356           break;
 357         case dir_positive_or_negative:
 358           fprintf (outf, "   +-");
 359           break;
 360         case dir_negative_or_equal:
 361           fprintf (outf, "   -=");
 362           break;
 363         case dir_star:
 364           fprintf (outf, "    *");
 365           break;
 366         default:
 367           fprintf (outf, "indep");
 368           break;
 369         }
 370     }
 371   fprintf (outf, "\n");
 372 }
 373
 374 /* Print a vector of direction vectors.  */
 375
 376 DEBUG_FUNCTION void
 377 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
 378                    int length)
 379 {
 380   for (lambda_vector v : dir_vects)
 381     print_direction_vector (outf, v, length);
 382 }
 383
 384 /* Print out a vector VEC of length N to OUTFILE.  */
 385
 386 DEBUG_FUNCTION void
 387 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
 388 {
 389   int i;
 390
 391   for (i = 0; i < n; i++)
 392     fprintf (outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]);
 393   fprintf (outfile, "\n");
 394 }
 395
 396 /* Print a vector of distance vectors.  */
 397
 398 DEBUG_FUNCTION void
 399 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
 400                     int length)
 401 {
 402   for (lambda_vector v : dist_vects)
 403     print_lambda_vector (outf, v, length);
 404 }
 405
 406 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
 407
 408 DEBUG_FUNCTION void
 409 dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
 410 {
 411   struct data_reference *dra, *drb;
 412
 413   fprintf (outf, "(Data Dep: \n");
 414
 415   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 416     {
 417       if (ddr)
 418         {
 419           dra = DDR_A (ddr);
 420           drb = DDR_B (ddr);
 421           if (dra)
 422             dump_data_reference (outf, dra);
 423           else
 424             fprintf (outf, "    (nil)\n");
 425           if (drb)
 426             dump_data_reference (outf, drb);
 427           else
 428             fprintf (outf, "    (nil)\n");
 429         }
 430       fprintf (outf, "    (don't know)\n)\n");
 431       return;
 432     }
 433
 434   dra = DDR_A (ddr);
 435   drb = DDR_B (ddr);
 436   dump_data_reference (outf, dra);
 437   dump_data_reference (outf, drb);
 438
 439   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 440     fprintf (outf, "    (no dependence)\n");
 441
 442   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
 443     {
 444       unsigned int i;
 445       class loop *loopi;
 446
 447       subscript *sub;
 448       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
 449         {
 450           fprintf (outf, "  access_fn_A: ");
 451           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
 452           fprintf (outf, "  access_fn_B: ");
 453           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
 454           dump_subscript (outf, sub);
 455         }
 456
 457       fprintf (outf, "  loop nest: (");
 458       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
 459         fprintf (outf, "%d ", loopi->num);
 460       fprintf (outf, ")\n");
 461
 462       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
 463         {
 464           fprintf (outf, "  distance_vector: ");
 465           print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
 466                                DDR_NB_LOOPS (ddr));
 467         }
 468
 469       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
 470         {
 471           fprintf (outf, "  direction_vector: ");
 472           print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
 473                                   DDR_NB_LOOPS (ddr));
 474         }
 475     }
 476
 477   fprintf (outf, ")\n");
 478 }
 479
 480 /* Debug version.  */
 481
 482 DEBUG_FUNCTION void
 483 debug_data_dependence_relation (const struct data_dependence_relation *ddr)
 484 {
 485   dump_data_dependence_relation (stderr, ddr);
 486 }
 487
 488 /* Dump into FILE all the dependence relations from DDRS.  */
 489
 490 DEBUG_FUNCTION void
 491 dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
 492 {
 493   for (auto ddr : ddrs)
 494     dump_data_dependence_relation (file, ddr);
 495 }
 496
 497 DEBUG_FUNCTION void
 498 debug (vec<ddr_p> &ref)
 499 {
 500   dump_data_dependence_relations (stderr, ref);
 501 }
 502
 503 DEBUG_FUNCTION void
 504 debug (vec<ddr_p> *ptr)
 505 {
 506   if (ptr)
 507     debug (*ptr);
 508   else
 509     fprintf (stderr, "<nil>\n");
 510 }
 511
 512
 513 /* Dump to STDERR all the dependence relations from DDRS.  */
 514
 515 DEBUG_FUNCTION void
 516 debug_data_dependence_relations (vec<ddr_p> ddrs)
 517 {
 518   dump_data_dependence_relations (stderr, ddrs);
 519 }
 520
 521 /* Dumps the distance and direction vectors in FILE.  DDRS contains
 522    the dependence relations, and VECT_SIZE is the size of the
 523    dependence vectors, or in other words the number of loops in the
 524    considered nest.  */
 525
 526 DEBUG_FUNCTION void
 527 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
 528 {
 529   for (data_dependence_relation *ddr : ddrs)
 530     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
 531       {
 532         for (lambda_vector v : DDR_DIST_VECTS (ddr))
 533           {
 534             fprintf (file, "DISTANCE_V (");
 535             print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
 536             fprintf (file, ")\n");
 537           }
 538
 539         for (lambda_vector v : DDR_DIR_VECTS (ddr))
 540           {
 541             fprintf (file, "DIRECTION_V (");
 542             print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
 543             fprintf (file, ")\n");
 544           }
 545       }
 546
 547   fprintf (file, "\n\n");
 548 }
 549
 550 /* Dumps the data dependence relations DDRS in FILE.  */
 551
 552 DEBUG_FUNCTION void
 553 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
 554 {
 555   for (data_dependence_relation *ddr : ddrs)
 556     dump_data_dependence_relation (file, ddr);
 557
 558   fprintf (file, "\n\n");
 559 }
 560
 561 DEBUG_FUNCTION void
 562 debug_ddrs (vec<ddr_p> ddrs)
 563 {
 564   dump_ddrs (stderr, ddrs);
 565 }
 566
 567 /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
 568    OP0 CODE OP1, where:
 569
 570    - OP0 CODE OP1 has integral type TYPE
 571    - the range of OP0 is given by OP0_RANGE and
 572    - the range of OP1 is given by OP1_RANGE.
 573
 574    Independently of RESULT_RANGE, try to compute:
 575
 576      DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
 577              - (sizetype) (OP0 CODE OP1)
 578
 579    as a constant and subtract DELTA from the ssizetype constant in *OFF.
 580    Return true on success, or false if DELTA is not known at compile time.
 581
 582    Truncation and sign changes are known to distribute over CODE, i.e.
 583
 584      (itype) (A CODE B) == (itype) A CODE (itype) B
 585
 586    for any integral type ITYPE whose precision is no greater than the
 587    precision of A and B.  */
 588
 589 static bool
 590 compute_distributive_range (tree type, value_range &op0_range,
 591                             tree_code code, value_range &op1_range,
 592                             tree *off, value_range *result_range)
 593 {
 594   gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
 595   if (result_range)
 596     {
 597       range_op_handler op (code);
 598       if (!op.fold_range (*result_range, type, op0_range, op1_range))
 599         result_range->set_varying (type);
 600     }
 601
 602   /* The distributive property guarantees that if TYPE is no narrower
 603      than SIZETYPE,
 604
 605        (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
 606
 607      and so we can treat DELTA as zero.  */
 608   if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
 609     return true;
 610
 611   /* If overflow is undefined, we can assume that:
 612
 613        X == (ssizetype) OP0 CODE (ssizetype) OP1
 614
 615      is within the range of TYPE, i.e.:
 616
 617        X == (ssizetype) (TYPE) X
 618
 619      Distributing the (TYPE) truncation over X gives:
 620
 621        X == (ssizetype) (OP0 CODE OP1)
 622
 623      Casting both sides to sizetype and distributing the sizetype cast
 624      over X gives:
 625
 626        (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
 627
 628      and so we can treat DELTA as zero.  */
 629   if (TYPE_OVERFLOW_UNDEFINED (type))
 630     return true;
 631
 632   /* Compute the range of:
 633
 634        (ssizetype) OP0 CODE (ssizetype) OP1
 635
 636      The distributive property guarantees that this has the same bitpattern as:
 637
 638        (sizetype) OP0 CODE (sizetype) OP1
 639
 640      but its range is more conducive to analysis.  */
 641   range_cast (op0_range, ssizetype);
 642   range_cast (op1_range, ssizetype);
 643   value_range wide_range;
 644   range_op_handler op (code);
 645   bool saved_flag_wrapv = flag_wrapv;
 646   flag_wrapv = 1;
 647   if (!op.fold_range (wide_range, ssizetype, op0_range, op1_range))
 648     wide_range.set_varying (ssizetype);;
 649   flag_wrapv = saved_flag_wrapv;
 650   if (wide_range.num_pairs () != 1
 651       || wide_range.varying_p () || wide_range.undefined_p ())
 652     return false;
 653
 654   wide_int lb = wide_range.lower_bound ();
 655   wide_int ub = wide_range.upper_bound ();
 656
 657   /* Calculate the number of times that each end of the range overflows or
 658      underflows TYPE.  We can only calculate DELTA if the numbers match.  */
 659   unsigned int precision = TYPE_PRECISION (type);
 660   if (!TYPE_UNSIGNED (type))
 661     {
 662       wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
 663       lb -= type_min;
 664       ub -= type_min;
 665     }
 666   wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
 667   lb &= upper_bits;
 668   ub &= upper_bits;
 669   if (lb != ub)
 670     return false;
 671
 672   /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
 673      negative values indicating underflow.  The low PRECISION bits of LB
 674      are clear, so DELTA is therefore LB (== UB).  */
 675   *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
 676   return true;
 677 }
 678
 679 /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
 680    given that OP has type FROM_TYPE and range RANGE.  Both TO_TYPE and
 681    FROM_TYPE are integral types.  */
 682
 683 static bool
 684 nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range)
 685 {
 686   gcc_assert (INTEGRAL_TYPE_P (to_type)
 687               && INTEGRAL_TYPE_P (from_type)
 688               && !TYPE_OVERFLOW_TRAPS (to_type)
 689               && !TYPE_OVERFLOW_TRAPS (from_type));
 690
 691   /* Converting to something no narrower than sizetype and then to sizetype
 692      is equivalent to converting directly to sizetype.  */
 693   if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
 694     return true;
 695
 696   /* Check whether TO_TYPE can represent all values that FROM_TYPE can.  */
 697   if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
 698       && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
 699     return true;
 700
 701   /* For narrowing conversions, we could in principle test whether
 702      the bits in FROM_TYPE but not in TO_TYPE have a fixed value
 703      and apply a constant adjustment.
 704
 705      For other conversions (which involve a sign change) we could
 706      check that the signs are always equal, and apply a constant
 707      adjustment if the signs are negative.
 708
 709      However, both cases should be rare.  */
 710   return range_fits_type_p (&range, TYPE_PRECISION (to_type),
 711                             TYPE_SIGN (to_type));
 712 }
 713
 714 static void
 715 split_constant_offset (tree type, tree *var, tree *off,
 716                        value_range *result_range,
 717                        hash_map<tree, std::pair<tree, tree> > &cache,
 718                        unsigned *limit);
 719
 720 /* Helper function for split_constant_offset.  If TYPE is a pointer type,
 721    try to express OP0 CODE OP1 as:
 722
 723      POINTER_PLUS <*VAR, (sizetype) *OFF>
 724
 725    where:
 726
 727    - *VAR has type TYPE
 728    - *OFF is a constant of type ssizetype.
 729
 730    If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
 731
 732      *VAR + (sizetype) *OFF
 733
 734    where:
 735
 736    - *VAR has type sizetype
 737    - *OFF is a constant of type ssizetype.
 738
 739    In both cases, OP0 CODE OP1 has type TYPE.
 740
 741    Return true on success.  A false return value indicates that we can't
 742    do better than set *OFF to zero.
 743
 744    When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
 745    if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
 746
 747    CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
 748    visited.  LIMIT counts down the number of SSA names that we are
 749    allowed to process before giving up.  */
 750
 751 static bool
 752 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 753                          tree *var, tree *off, value_range *result_range,
 754                          hash_map<tree, std::pair<tree, tree> > &cache,
 755                          unsigned *limit)
 756 {
 757   tree var0, var1;
 758   tree off0, off1;
 759   value_range op0_range, op1_range;
 760
 761   *var = NULL_TREE;
 762   *off = NULL_TREE;
 763
 764   if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
 765     return false;
 766
 767   switch (code)
 768     {
 769     case INTEGER_CST:
 770       *var = size_int (0);
 771       *off = fold_convert (ssizetype, op0);
 772       if (result_range)
 773         {
 774           wide_int w = wi::to_wide (op0);
 775           result_range->set (TREE_TYPE (op0), w, w);
 776         }
 777       return true;
 778
 779     case POINTER_PLUS_EXPR:
 780       split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
 781       split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
 782       *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
 783       *off = size_binop (PLUS_EXPR, off0, off1);
 784       return true;
 785
 786     case PLUS_EXPR:
 787     case MINUS_EXPR:
 788       split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
 789       split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
 790       *off = size_binop (code, off0, off1);
 791       if (!compute_distributive_range (type, op0_range, code, op1_range,
 792                                        off, result_range))
 793         return false;
 794       *var = fold_build2 (code, sizetype, var0, var1);
 795       return true;
 796
 797     case MULT_EXPR:
 798       if (TREE_CODE (op1) != INTEGER_CST)
 799         return false;
 800
 801       split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
 802       op1_range.set (TREE_TYPE (op1), wi::to_wide (op1), wi::to_wide (op1));
 803       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
 804       if (!compute_distributive_range (type, op0_range, code, op1_range,
 805                                        off, result_range))
 806         return false;
 807       *var = fold_build2 (MULT_EXPR, sizetype, var0,
 808                           fold_convert (sizetype, op1));
 809       return true;
 810
 811     case ADDR_EXPR:
 812       {
 813         tree base, poffset;
 814         poly_int64 pbitsize, pbitpos, pbytepos;
 815         machine_mode pmode;
 816         int punsignedp, preversep, pvolatilep;
 817
 818         op0 = TREE_OPERAND (op0, 0);
 819         base
 820           = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
 821                                  &punsignedp, &preversep, &pvolatilep);
 822
 823         if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 824           return false;
 825         base = build_fold_addr_expr (base);
 826         off0 = ssize_int (pbytepos);
 827
 828         if (poffset)
 829           {
 830             split_constant_offset (poffset, &poffset, &off1, nullptr,
 831                                    cache, limit);
 832             off0 = size_binop (PLUS_EXPR, off0, off1);
 833             base = fold_build_pointer_plus (base, poffset);
 834           }
 835
 836         var0 = fold_convert (type, base);
 837
 838         /* If variable length types are involved, punt, otherwise casts
 839            might be converted into ARRAY_REFs in gimplify_conversion.
 840            To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
 841            possibly no longer appears in current GIMPLE, might resurface.
 842            This perhaps could run
 843            if (CONVERT_EXPR_P (var0))
 844              {
 845                gimplify_conversion (&var0);
 846                // Attempt to fill in any within var0 found ARRAY_REF's
 847                // element size from corresponding op embedded ARRAY_REF,
 848                // if unsuccessful, just punt.
 849              }  */
 850         while (POINTER_TYPE_P (type))
 851           type = TREE_TYPE (type);
 852         if (int_size_in_bytes (type) < 0)
 853           return false;
 854
 855         *var = var0;
 856         *off = off0;
 857         return true;
 858       }
 859
 860     case SSA_NAME:
 861       {
 862         if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
 863           return false;
 864
 865         gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
 866         enum tree_code subcode;
 867
 868         if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
 869           return false;
 870
 871         subcode = gimple_assign_rhs_code (def_stmt);
 872
 873         /* We are using a cache to avoid un-CSEing large amounts of code.  */
 874         bool use_cache = false;
 875         if (!has_single_use (op0)
 876             && (subcode == POINTER_PLUS_EXPR
 877                 || subcode == PLUS_EXPR
 878                 || subcode == MINUS_EXPR
 879                 || subcode == MULT_EXPR
 880                 || subcode == ADDR_EXPR
 881                 || CONVERT_EXPR_CODE_P (subcode)))
 882           {
 883             use_cache = true;
 884             bool existed;
 885             std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
 886             if (existed)
 887               {
 888                 if (integer_zerop (e.second))
 889                   return false;
 890                 *var = e.first;
 891                 *off = e.second;
 892                 /* The caller sets the range in this case.  */
 893                 return true;
 894               }
 895             e = std::make_pair (op0, ssize_int (0));
 896           }
 897
 898         if (*limit == 0)
 899           return false;
 900         --*limit;
 901
 902         var0 = gimple_assign_rhs1 (def_stmt);
 903         var1 = gimple_assign_rhs2 (def_stmt);
 904
 905         bool res = split_constant_offset_1 (type, var0, subcode, var1,
 906                                             var, off, nullptr, cache, limit);
 907         if (res && use_cache)
 908           *cache.get (op0) = std::make_pair (*var, *off);
 909         /* The caller sets the range in this case.  */
 910         return res;
 911       }
 912     CASE_CONVERT:
 913       {
 914         /* We can only handle the following conversions:
 915
 916            - Conversions from one pointer type to another pointer type.
 917
 918            - Conversions from one non-trapping integral type to another
 919              non-trapping integral type.  In this case, the recursive
 920              call makes sure that:
 921
 922                (sizetype) OP0
 923
 924              can be expressed as a sizetype operation involving VAR and OFF,
 925              and all we need to do is check whether:
 926
 927                (sizetype) OP0 == (sizetype) (TYPE) OP0
 928
 929            - Conversions from a non-trapping sizetype-size integral type to
 930              a like-sized pointer type.  In this case, the recursive call
 931              makes sure that:
 932
 933                (sizetype) OP0 == *VAR + (sizetype) *OFF
 934
 935              and we can convert that to:
 936
 937                POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
 938
 939            - Conversions from a sizetype-sized pointer type to a like-sized
 940              non-trapping integral type.  In this case, the recursive call
 941              makes sure that:
 942
 943                OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
 944
 945              where the POINTER_PLUS and *VAR have the same precision as
 946              TYPE (and the same precision as sizetype).  Then:
 947
 948                (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF.  */
 949         tree itype = TREE_TYPE (op0);
 950         if ((POINTER_TYPE_P (itype)
 951              || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
 952             && (POINTER_TYPE_P (type)
 953                 || (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
 954             && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
 955                 || (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
 956                     && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
 957           {
 958             if (POINTER_TYPE_P (type))
 959               {
 960                 split_constant_offset (op0, var, off, nullptr, cache, limit);
 961                 *var = fold_convert (type, *var);
 962               }
 963             else if (POINTER_TYPE_P (itype))
 964               {
 965                 split_constant_offset (op0, var, off, nullptr, cache, limit);
 966                 *var = fold_convert (sizetype, *var);
 967               }
 968             else
 969               {
 970                 split_constant_offset (op0, var, off, &op0_range,
 971                                        cache, limit);
 972                 if (!nop_conversion_for_offset_p (type, itype, op0_range))
 973                   return false;
 974                 if (result_range)
 975                   {
 976                     *result_range = op0_range;
 977                     range_cast (*result_range, type);
 978                   }
 979               }
 980             return true;
 981           }
 982         return false;
 983       }
 984
 985     default:
 986       return false;
 987     }
 988 }
 989
 990 /* If EXP has pointer type, try to express it as:
 991
 992      POINTER_PLUS <*VAR, (sizetype) *OFF>
 993
 994    where:
 995
 996    - *VAR has the same type as EXP
 997    - *OFF is a constant of type ssizetype.
 998
 999    If EXP has an integral type, try to express (sizetype) EXP as:
1000
1001      *VAR + (sizetype) *OFF
1002
1003    where:
1004
1005    - *VAR has type sizetype
1006    - *OFF is a constant of type ssizetype.
1007
1008    If EXP_RANGE is nonnull, set it to the range of EXP.
1009
1010    CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1011    visited.  LIMIT counts down the number of SSA names that we are
1012    allowed to process before giving up.  */
1013
1014 static void
1015 split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range,
1016                        hash_map<tree, std::pair<tree, tree> > &cache,
1017                        unsigned *limit)
1018 {
1019   tree type = TREE_TYPE (exp), op0, op1;
1020   enum tree_code code;
1021
1022   code = TREE_CODE (exp);
1023   if (exp_range)
1024     {
1025       *exp_range = type;
1026       if (code == SSA_NAME)
1027         {
1028           value_range vr;
1029           get_range_query (cfun)->range_of_expr (vr, exp);
1030           if (vr.undefined_p ())
1031             vr.set_varying (TREE_TYPE (exp));
1032           tree vr_min, vr_max;
1033           value_range_kind vr_kind = get_legacy_range (vr, vr_min, vr_max);
1034           wide_int var_min = wi::to_wide (vr_min);
1035           wide_int var_max = wi::to_wide (vr_max);
1036           wide_int var_nonzero = get_nonzero_bits (exp);
1037           vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1038                                                        &var_min, &var_max,
1039                                                        var_nonzero,
1040                                                        TYPE_SIGN (type));
1041           /* This check for VR_VARYING is here because the old code
1042              using get_range_info would return VR_RANGE for the entire
1043              domain, instead of VR_VARYING.  The new code normalizes
1044              full-domain ranges to VR_VARYING.  */
1045           if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
1046             *exp_range = value_range (type, var_min, var_max);
1047         }
1048     }
1049
1050   if (!tree_is_chrec (exp)
1051       && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1052     {
1053       extract_ops_from_tree (exp, &code, &op0, &op1);
1054       if (split_constant_offset_1 (type, op0, code, op1, var, off,
1055                                    exp_range, cache, limit))
1056         return;
1057     }
1058
1059   *var = exp;
1060   if (INTEGRAL_TYPE_P (type))
1061     *var = fold_convert (sizetype, *var);
1062   *off = ssize_int (0);
1063
1064   value_range r;
1065   if (exp_range && code != SSA_NAME
1066       && get_range_query (cfun)->range_of_expr (r, exp)
1067       && !r.undefined_p ())
1068     *exp_range = r;
1069 }
1070
1071 /* Expresses EXP as VAR + OFF, where OFF is a constant.  VAR has the same
1072    type as EXP while OFF has type ssizetype.  */
1073
1074 void
1075 split_constant_offset (tree exp, tree *var, tree *off)
1076 {
1077   unsigned limit = param_ssa_name_def_chain_limit;
1078   static hash_map<tree, std::pair<tree, tree> > *cache;
1079   if (!cache)
1080     cache = new hash_map<tree, std::pair<tree, tree> > (37);
1081   split_constant_offset (exp, var, off, nullptr, *cache, &limit);
1082   *var = fold_convert (TREE_TYPE (exp), *var);
1083   cache->empty ();
1084 }
1085
1086 /* Returns the address ADDR of an object in a canonical shape (without nop
1087    casts, and with type of pointer to the object).  */
1088
1089 static tree
1090 canonicalize_base_object_address (tree addr)
1091 {
1092   tree orig = addr;
1093
1094   STRIP_NOPS (addr);
1095
1096   /* The base address may be obtained by casting from integer, in that case
1097      keep the cast.  */
1098   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1099     return orig;
1100
1101   if (TREE_CODE (addr) != ADDR_EXPR)
1102     return addr;
1103
1104   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1105 }
1106
1107 /* Analyze the behavior of memory reference REF within STMT.
1108    There are two modes:
1109
1110    - BB analysis.  In this case we simply split the address into base,
1111      init and offset components, without reference to any containing loop.
1112      The resulting base and offset are general expressions and they can
1113      vary arbitrarily from one iteration of the containing loop to the next.
1114      The step is always zero.
1115
1116    - loop analysis.  In this case we analyze the reference both wrt LOOP
1117      and on the basis that the reference occurs (is "used") in LOOP;
1118      see the comment above analyze_scalar_evolution_in_loop for more
1119      information about this distinction.  The base, init, offset and
1120      step fields are all invariant in LOOP.
1121
1122    Perform BB analysis if LOOP is null, or if LOOP is the function's
1123    dummy outermost loop.  In other cases perform loop analysis.
1124
1125    Return true if the analysis succeeded and store the results in DRB if so.
1126    BB analysis can only fail for bitfield or reversed-storage accesses.  */
1127
1128 opt_result
1129 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1130                       class loop *loop, const gimple *stmt)
1131 {
1132   poly_int64 pbitsize, pbitpos;
1133   tree base, poffset;
1134   machine_mode pmode;
1135   int punsignedp, preversep, pvolatilep;
1136   affine_iv base_iv, offset_iv;
1137   tree init, dinit, step;
1138   bool in_loop = (loop && loop->num);
1139
1140   if (dump_file && (dump_flags & TDF_DETAILS))
1141     fprintf (dump_file, "analyze_innermost: ");
1142
1143   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1144                               &punsignedp, &preversep, &pvolatilep);
1145   gcc_assert (base != NULL_TREE);
1146
1147   poly_int64 pbytepos;
1148   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
1149     return opt_result::failure_at (stmt,
1150                                    "failed: bit offset alignment.\n");
1151
1152   if (preversep)
1153     return opt_result::failure_at (stmt,
1154                                    "failed: reverse storage order.\n");
1155
1156   /* Calculate the alignment and misalignment for the inner reference.  */
1157   unsigned int HOST_WIDE_INT bit_base_misalignment;
1158   unsigned int bit_base_alignment;
1159   get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1160
1161   /* There are no bitfield references remaining in BASE, so the values
1162      we got back must be whole bytes.  */
1163   gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1164               && bit_base_misalignment % BITS_PER_UNIT == 0);
1165   unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1166   poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1167
1168   if (TREE_CODE (base) == MEM_REF)
1169     {
1170       if (!integer_zerop (TREE_OPERAND (base, 1)))
1171         {
1172           /* Subtract MOFF from the base and add it to POFFSET instead.
1173              Adjust the misalignment to reflect the amount we subtracted.  */
1174           poly_offset_int moff = mem_ref_offset (base);
1175           base_misalignment -= moff.force_shwi ();
1176           tree mofft = wide_int_to_tree (sizetype, moff);
1177           if (!poffset)
1178             poffset = mofft;
1179           else
1180             poffset = size_binop (PLUS_EXPR, poffset, mofft);
1181         }
1182       base = TREE_OPERAND (base, 0);
1183     }
1184   else
1185     base = build_fold_addr_expr (base);
1186
1187   if (in_loop)
1188     {
1189       if (!simple_iv (loop, loop, base, &base_iv, true))
1190         return opt_result::failure_at
1191           (stmt, "failed: evolution of base is not affine.\n");
1192     }
1193   else
1194     {
1195       base_iv.base = base;
1196       base_iv.step = ssize_int (0);
1197       base_iv.no_overflow = true;
1198     }
1199
1200   if (!poffset)
1201     {
1202       offset_iv.base = ssize_int (0);
1203       offset_iv.step = ssize_int (0);
1204     }
1205   else
1206     {
1207       if (!in_loop)
1208         {
1209           offset_iv.base = poffset;
1210           offset_iv.step = ssize_int (0);
1211         }
1212       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1213         return opt_result::failure_at
1214           (stmt, "failed: evolution of offset is not affine.\n");
1215     }
1216
1217   init = ssize_int (pbytepos);
1218
1219   /* Subtract any constant component from the base and add it to INIT instead.
1220      Adjust the misalignment to reflect the amount we subtracted.  */
1221   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1222   init = size_binop (PLUS_EXPR, init, dinit);
1223   base_misalignment -= TREE_INT_CST_LOW (dinit);
1224
1225   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1226   init = size_binop (PLUS_EXPR, init, dinit);
1227
1228   step = size_binop (PLUS_EXPR,
1229                      fold_convert (ssizetype, base_iv.step),
1230                      fold_convert (ssizetype, offset_iv.step));
1231
1232   base = canonicalize_base_object_address (base_iv.base);
1233
1234   /* See if get_pointer_alignment can guarantee a higher alignment than
1235      the one we calculated above.  */
1236   unsigned int HOST_WIDE_INT alt_misalignment;
1237   unsigned int alt_alignment;
1238   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1239
1240   /* As above, these values must be whole bytes.  */
1241   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1242               && alt_misalignment % BITS_PER_UNIT == 0);
1243   alt_alignment /= BITS_PER_UNIT;
1244   alt_misalignment /= BITS_PER_UNIT;
1245
1246   if (base_alignment < alt_alignment)
1247     {
1248       base_alignment = alt_alignment;
1249       base_misalignment = alt_misalignment;
1250     }
1251
1252   drb->base_address = base;
1253   drb->offset = fold_convert (ssizetype, offset_iv.base);
1254   drb->init = init;
1255   drb->step = step;
1256   if (known_misalignment (base_misalignment, base_alignment,
1257                           &drb->base_misalignment))
1258     drb->base_alignment = base_alignment;
1259   else
1260     {
1261       drb->base_alignment = known_alignment (base_misalignment);
1262       drb->base_misalignment = 0;
1263     }
1264   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1265   drb->step_alignment = highest_pow2_factor (step);
1266
1267   if (dump_file && (dump_flags & TDF_DETAILS))
1268     fprintf (dump_file, "success.\n");
1269
1270   return opt_result::success ();
1271 }
1272
1273 /* Return true if OP is a valid component reference for a DR access
1274    function.  This accepts a subset of what handled_component_p accepts.  */
1275
1276 static bool
1277 access_fn_component_p (tree op)
1278 {
1279   switch (TREE_CODE (op))
1280     {
1281     case REALPART_EXPR:
1282     case IMAGPART_EXPR:
1283     case ARRAY_REF:
1284       return true;
1285
1286     case COMPONENT_REF:
1287       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1288
1289     default:
1290       return false;
1291     }
1292 }
1293
1294 /* Returns whether BASE can have a access_fn_component_p with BASE
1295    as base.  */
1296
1297 static bool
1298 base_supports_access_fn_components_p (tree base)
1299 {
1300   switch (TREE_CODE (TREE_TYPE (base)))
1301     {
1302     case COMPLEX_TYPE:
1303     case ARRAY_TYPE:
1304     case RECORD_TYPE:
1305       return true;
1306     default:
1307       return false;
1308     }
1309 }
1310
1311 /* Determines the base object and the list of indices of memory reference
1312    DR, analyzed in LOOP and instantiated before NEST.  */
1313
1314 static void
1315 dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
1316 {
1317   /* If analyzing a basic-block there are no indices to analyze
1318      and thus no access functions.  */
1319   if (!nest)
1320     {
1321       dri->base_object = ref;
1322       dri->access_fns.create (0);
1323       return;
1324     }
1325
1326   vec<tree> access_fns = vNULL;
1327
1328   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1329      into a two element array with a constant index.  The base is
1330      then just the immediate underlying object.  */
1331   if (TREE_CODE (ref) == REALPART_EXPR)
1332     {
1333       ref = TREE_OPERAND (ref, 0);
1334       access_fns.safe_push (integer_zero_node);
1335     }
1336   else if (TREE_CODE (ref) == IMAGPART_EXPR)
1337     {
1338       ref = TREE_OPERAND (ref, 0);
1339       access_fns.safe_push (integer_one_node);
1340     }
1341
1342   /* Analyze access functions of dimensions we know to be independent.
1343      The list of component references handled here should be kept in
1344      sync with access_fn_component_p.  */
1345   while (handled_component_p (ref))
1346     {
1347       if (TREE_CODE (ref) == ARRAY_REF)
1348         {
1349           tree op = TREE_OPERAND (ref, 1);
1350           tree access_fn = analyze_scalar_evolution (loop, op);
1351           access_fn = instantiate_scev (nest, loop, access_fn);
1352           access_fns.safe_push (access_fn);
1353         }
1354       else if (TREE_CODE (ref) == COMPONENT_REF
1355                && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1356         {
1357           /* For COMPONENT_REFs of records (but not unions!) use the
1358              FIELD_DECL offset as constant access function so we can
1359              disambiguate a[i].f1 and a[i].f2.  */
1360           tree off = component_ref_field_offset (ref);
1361           off = size_binop (PLUS_EXPR,
1362                             size_binop (MULT_EXPR,
1363                                         fold_convert (bitsizetype, off),
1364                                         bitsize_int (BITS_PER_UNIT)),
1365                             DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1366           access_fns.safe_push (off);
1367         }
1368       else
1369         /* If we have an unhandled component we could not translate
1370            to an access function stop analyzing.  We have determined
1371            our base object in this case.  */
1372         break;
1373
1374       ref = TREE_OPERAND (ref, 0);
1375     }
1376
1377   /* If the address operand of a MEM_REF base has an evolution in the
1378      analyzed nest, add it as an additional independent access-function.  */
1379   if (TREE_CODE (ref) == MEM_REF)
1380     {
1381       tree op = TREE_OPERAND (ref, 0);
1382       tree access_fn = analyze_scalar_evolution (loop, op);
1383       access_fn = instantiate_scev (nest, loop, access_fn);
1384       STRIP_NOPS (access_fn);
1385       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1386         {
1387           tree memoff = TREE_OPERAND (ref, 1);
1388           tree base = initial_condition (access_fn);
1389           tree orig_type = TREE_TYPE (base);
1390           STRIP_USELESS_TYPE_CONVERSION (base);
1391           tree off;
1392           split_constant_offset (base, &base, &off);
1393           STRIP_USELESS_TYPE_CONVERSION (base);
1394           /* Fold the MEM_REF offset into the evolutions initial
1395              value to make more bases comparable.  */
1396           if (!integer_zerop (memoff))
1397             {
1398               off = size_binop (PLUS_EXPR, off,
1399                                 fold_convert (ssizetype, memoff));
1400               memoff = build_int_cst (TREE_TYPE (memoff), 0);
1401             }
1402           /* Adjust the offset so it is a multiple of the access type
1403              size and thus we separate bases that can possibly be used
1404              to produce partial overlaps (which the access_fn machinery
1405              cannot handle).  */
1406           wide_int rem;
1407           if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1408               && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1409               && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1410             rem = wi::mod_trunc
1411               (wi::to_wide (off),
1412                wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1413                SIGNED);
1414           else
1415             /* If we can't compute the remainder simply force the initial
1416                condition to zero.  */
1417             rem = wi::to_wide (off);
1418           off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1419           memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1420           /* And finally replace the initial condition.  */
1421           access_fn = chrec_replace_initial_condition
1422               (access_fn, fold_convert (orig_type, off));
1423           /* ???  This is still not a suitable base object for
1424              dr_may_alias_p - the base object needs to be an
1425              access that covers the object as whole.  With
1426              an evolution in the pointer this cannot be
1427              guaranteed.
1428              As a band-aid, mark the access so we can special-case
1429              it in dr_may_alias_p.  */
1430           tree old = ref;
1431           ref = fold_build2_loc (EXPR_LOCATION (ref),
1432                                  MEM_REF, TREE_TYPE (ref),
1433                                  base, memoff);
1434           MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1435           MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1436           dri->unconstrained_base = true;
1437           access_fns.safe_push (access_fn);
1438         }
1439     }
1440   else if (DECL_P (ref))
1441     {
1442       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1443       ref = build2 (MEM_REF, TREE_TYPE (ref),
1444                     build_fold_addr_expr (ref),
1445                     build_int_cst (reference_alias_ptr_type (ref), 0));
1446     }
1447
1448   dri->base_object = ref;
1449   dri->access_fns = access_fns;
1450 }
1451
1452 /* Extracts the alias analysis information from the memory reference DR.  */
1453
1454 static void
1455 dr_analyze_alias (struct data_reference *dr)
1456 {
1457   tree ref = DR_REF (dr);
1458   tree base = get_base_address (ref), addr;
1459
1460   if (INDIRECT_REF_P (base)
1461       || TREE_CODE (base) == MEM_REF)
1462     {
1463       addr = TREE_OPERAND (base, 0);
1464       if (TREE_CODE (addr) == SSA_NAME)
1465         DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1466     }
1467 }
1468
1469 /* Frees data reference DR.  */
1470
1471 void
1472 free_data_ref (data_reference_p dr)
1473 {
1474   DR_ACCESS_FNS (dr).release ();
1475   if (dr->alt_indices.base_object)
1476     dr->alt_indices.access_fns.release ();
1477   free (dr);
1478 }
1479
1480 /* Analyze memory reference MEMREF, which is accessed in STMT.
1481    The reference is a read if IS_READ is true, otherwise it is a write.
1482    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1483    within STMT, i.e. that it might not occur even if STMT is executed
1484    and runs to completion.
1485
1486    Return the data_reference description of MEMREF.  NEST is the outermost
1487    loop in which the reference should be instantiated, LOOP is the loop
1488    in which the data reference should be analyzed.  */
1489
1490 struct data_reference *
1491 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1492                  bool is_read, bool is_conditional_in_stmt)
1493 {
1494   struct data_reference *dr;
1495
1496   if (dump_file && (dump_flags & TDF_DETAILS))
1497     {
1498       fprintf (dump_file, "Creating dr for ");
1499       print_generic_expr (dump_file, memref, TDF_SLIM);
1500       fprintf (dump_file, "\n");
1501     }
1502
1503   dr = XCNEW (struct data_reference);
1504   DR_STMT (dr) = stmt;
1505   DR_REF (dr) = memref;
1506   DR_IS_READ (dr) = is_read;
1507   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1508
1509   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1510                         nest != NULL ? loop : NULL, stmt);
1511   dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop);
1512   dr_analyze_alias (dr);
1513
1514   if (dump_file && (dump_flags & TDF_DETAILS))
1515     {
1516       unsigned i;
1517       fprintf (dump_file, "\tbase_address: ");
1518       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1519       fprintf (dump_file, "\n\toffset from base address: ");
1520       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1521       fprintf (dump_file, "\n\tconstant offset from base address: ");
1522       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1523       fprintf (dump_file, "\n\tstep: ");
1524       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1525       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1526       fprintf (dump_file, "\n\tbase misalignment: %d",
1527                DR_BASE_MISALIGNMENT (dr));
1528       fprintf (dump_file, "\n\toffset alignment: %d",
1529                DR_OFFSET_ALIGNMENT (dr));
1530       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1531       fprintf (dump_file, "\n\tbase_object: ");
1532       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1533       fprintf (dump_file, "\n");
1534       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1535         {
1536           fprintf (dump_file, "\tAccess function %d: ", i);
1537           print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1538         }
1539     }
1540
1541   return dr;
1542 }
1543
1544 /*  A helper function computes order between two tree expressions T1 and T2.
1545     This is used in comparator functions sorting objects based on the order
1546     of tree expressions.  The function returns -1, 0, or 1.  */
1547
1548 int
1549 data_ref_compare_tree (tree t1, tree t2)
1550 {
1551   int i, cmp;
1552   enum tree_code code;
1553   char tclass;
1554
1555   if (t1 == t2)
1556     return 0;
1557   if (t1 == NULL)
1558     return -1;
1559   if (t2 == NULL)
1560     return 1;
1561
1562   STRIP_USELESS_TYPE_CONVERSION (t1);
1563   STRIP_USELESS_TYPE_CONVERSION (t2);
1564   if (t1 == t2)
1565     return 0;
1566
1567   if (TREE_CODE (t1) != TREE_CODE (t2)
1568       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1569     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1570
1571   code = TREE_CODE (t1);
1572   switch (code)
1573     {
1574     case INTEGER_CST:
1575       return tree_int_cst_compare (t1, t2);
1576
1577     case STRING_CST:
1578       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1579         return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1580       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1581                      TREE_STRING_LENGTH (t1));
1582
1583     case SSA_NAME:
1584       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1585         return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1586       break;
1587
1588     default:
1589       if (POLY_INT_CST_P (t1))
1590         return compare_sizes_for_sort (wi::to_poly_widest (t1),
1591                                        wi::to_poly_widest (t2));
1592
1593       tclass = TREE_CODE_CLASS (code);
1594
1595       /* For decls, compare their UIDs.  */
1596       if (tclass == tcc_declaration)
1597         {
1598           if (DECL_UID (t1) != DECL_UID (t2))
1599             return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1600           break;
1601         }
1602       /* For expressions, compare their operands recursively.  */
1603       else if (IS_EXPR_CODE_CLASS (tclass))
1604         {
1605           for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1606             {
1607               cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1608                                            TREE_OPERAND (t2, i));
1609               if (cmp != 0)
1610                 return cmp;
1611             }
1612         }
1613       else
1614         gcc_unreachable ();
1615     }
1616
1617   return 0;
1618 }
1619
1620 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1621    check.  */
1622
1623 opt_result
1624 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1625 {
1626   if (dump_enabled_p ())
1627     dump_printf (MSG_NOTE,
1628                  "consider run-time aliasing test between %T and %T\n",
1629                  DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1630
1631   if (!speed_p)
1632     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1633                                    "runtime alias check not supported when"
1634                                    " optimizing for size.\n");
1635
1636   /* FORNOW: We don't support versioning with outer-loop in either
1637      vectorization or loop distribution.  */
1638   if (loop != NULL && loop->inner != NULL)
1639     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1640                                    "runtime alias check not supported for"
1641                                    " outer loop.\n");
1642
1643   /* FORNOW: We don't support handling different address spaces.  */
1644   if (TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_A (ddr)))))
1645       != TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_B (ddr))))))
1646     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1647                                    "runtime alias check between different "
1648                                    "address spaces not supported.\n");
1649
1650   return opt_result::success ();
1651 }
1652
1653 /* Operator == between two dr_with_seg_len objects.
1654
1655    This equality operator is used to make sure two data refs
1656    are the same one so that we will consider to combine the
1657    aliasing checks of those two pairs of data dependent data
1658    refs.  */
1659
1660 static bool
1661 operator == (const dr_with_seg_len& d1,
1662              const dr_with_seg_len& d2)
1663 {
1664   return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1665                            DR_BASE_ADDRESS (d2.dr), 0)
1666           && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1667           && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1668           && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1669           && known_eq (d1.access_size, d2.access_size)
1670           && d1.align == d2.align);
1671 }
1672
1673 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1674    so that we can combine aliasing checks in one scan.  */
1675
1676 static int
1677 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1678 {
1679   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1680   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1681   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1682   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1683
1684   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1685      if a and c have the same basic address snd step, and b and d have the same
1686      address and step.  Therefore, if any a&c or b&d don't have the same address
1687      and step, we don't care the order of those two pairs after sorting.  */
1688   int comp_res;
1689
1690   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1691                                          DR_BASE_ADDRESS (b1.dr))) != 0)
1692     return comp_res;
1693   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1694                                          DR_BASE_ADDRESS (b2.dr))) != 0)
1695     return comp_res;
1696   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1697                                          DR_STEP (b1.dr))) != 0)
1698     return comp_res;
1699   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1700                                          DR_STEP (b2.dr))) != 0)
1701     return comp_res;
1702   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1703                                          DR_OFFSET (b1.dr))) != 0)
1704     return comp_res;
1705   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1706                                          DR_INIT (b1.dr))) != 0)
1707     return comp_res;
1708   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1709                                          DR_OFFSET (b2.dr))) != 0)
1710     return comp_res;
1711   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1712                                          DR_INIT (b2.dr))) != 0)
1713     return comp_res;
1714
1715   return 0;
1716 }
1717
1718 /* Dump information about ALIAS_PAIR, indenting each line by INDENT.  */
1719
1720 static void
1721 dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1722 {
1723   dump_printf (MSG_NOTE, "%sreference:      %T vs. %T\n", indent,
1724                DR_REF (alias_pair->first.dr),
1725                DR_REF (alias_pair->second.dr));
1726
1727   dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1728                alias_pair->first.seg_len);
1729   if (!operand_equal_p (alias_pair->first.seg_len,
1730                         alias_pair->second.seg_len, 0))
1731     dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1732
1733   dump_printf (MSG_NOTE, "\n%saccess size:    ", indent);
1734   dump_dec (MSG_NOTE, alias_pair->first.access_size);
1735   if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1736     {
1737       dump_printf (MSG_NOTE, " vs. ");
1738       dump_dec (MSG_NOTE, alias_pair->second.access_size);
1739     }
1740
1741   dump_printf (MSG_NOTE, "\n%salignment:      %d", indent,
1742                alias_pair->first.align);
1743   if (alias_pair->first.align != alias_pair->second.align)
1744     dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1745
1746   dump_printf (MSG_NOTE, "\n%sflags:         ", indent);
1747   if (alias_pair->flags & DR_ALIAS_RAW)
1748     dump_printf (MSG_NOTE, " RAW");
1749   if (alias_pair->flags & DR_ALIAS_WAR)
1750     dump_printf (MSG_NOTE, " WAR");
1751   if (alias_pair->flags & DR_ALIAS_WAW)
1752     dump_printf (MSG_NOTE, " WAW");
1753   if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1754     dump_printf (MSG_NOTE, " ARBITRARY");
1755   if (alias_pair->flags & DR_ALIAS_SWAPPED)
1756     dump_printf (MSG_NOTE, " SWAPPED");
1757   if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1758     dump_printf (MSG_NOTE, " UNSWAPPED");
1759   if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1760     dump_printf (MSG_NOTE, " MIXED_STEPS");
1761   if (alias_pair->flags == 0)
1762     dump_printf (MSG_NOTE, " <none>");
1763   dump_printf (MSG_NOTE, "\n");
1764 }
1765
1766 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1767    FACTOR is number of iterations that each data reference is accessed.
1768
1769    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1770    we create an expression:
1771
1772    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1773    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1774
1775    for aliasing checks.  However, in some cases we can decrease the number
1776    of checks by combining two checks into one.  For example, suppose we have
1777    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1778    condition is satisfied:
1779
1780    load_ptr_0 < load_ptr_1  &&
1781    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1782
1783    (this condition means, in each iteration of vectorized loop, the accessed
1784    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1785    load_ptr_1.)
1786
1787    we then can use only the following expression to finish the alising checks
1788    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1789
1790    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1791    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1792
1793    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1794    basic address.  */
1795
1796 void
1797 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1798                                poly_uint64)
1799 {
1800   if (alias_pairs->is_empty ())
1801     return;
1802
1803   /* Canonicalize each pair so that the base components are ordered wrt
1804      data_ref_compare_tree.  This allows the loop below to merge more
1805      cases.  */
1806   unsigned int i;
1807   dr_with_seg_len_pair_t *alias_pair;
1808   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1809     {
1810       data_reference_p dr_a = alias_pair->first.dr;
1811       data_reference_p dr_b = alias_pair->second.dr;
1812       int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1813                                             DR_BASE_ADDRESS (dr_b));
1814       if (comp_res == 0)
1815         comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1816       if (comp_res == 0)
1817         comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1818       if (comp_res > 0)
1819         {
1820           std::swap (alias_pair->first, alias_pair->second);
1821           alias_pair->flags |= DR_ALIAS_SWAPPED;
1822         }
1823       else
1824         alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1825     }
1826
1827   /* Sort the collected data ref pairs so that we can scan them once to
1828      combine all possible aliasing checks.  */
1829   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1830
1831   /* Scan the sorted dr pairs and check if we can combine alias checks
1832      of two neighboring dr pairs.  */
1833   unsigned int last = 0;
1834   for (i = 1; i < alias_pairs->length (); ++i)
1835     {
1836       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1837       dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1838       dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1839
1840       dr_with_seg_len *dr_a1 = &alias_pair1->first;
1841       dr_with_seg_len *dr_b1 = &alias_pair1->second;
1842       dr_with_seg_len *dr_a2 = &alias_pair2->first;
1843       dr_with_seg_len *dr_b2 = &alias_pair2->second;
1844
1845       /* Remove duplicate data ref pairs.  */
1846       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1847         {
1848           if (dump_enabled_p ())
1849             dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1850                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1851                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1852           alias_pair1->flags |= alias_pair2->flags;
1853           continue;
1854         }
1855
1856       /* Assume that we won't be able to merge the pairs, then correct
1857          if we do.  */
1858       last += 1;
1859       if (last != i)
1860         (*alias_pairs)[last] = (*alias_pairs)[i];
1861
1862       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1863         {
1864           /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1865              and DR_A1 and DR_A2 are two consecutive memrefs.  */
1866           if (*dr_a1 == *dr_a2)
1867             {
1868               std::swap (dr_a1, dr_b1);
1869               std::swap (dr_a2, dr_b2);
1870             }
1871
1872           poly_int64 init_a1, init_a2;
1873           /* Only consider cases in which the distance between the initial
1874              DR_A1 and the initial DR_A2 is known at compile time.  */
1875           if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1876                                 DR_BASE_ADDRESS (dr_a2->dr), 0)
1877               || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1878                                    DR_OFFSET (dr_a2->dr), 0)
1879               || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1880               || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1881             continue;
1882
1883           /* Don't combine if we can't tell which one comes first.  */
1884           if (!ordered_p (init_a1, init_a2))
1885             continue;
1886
1887           /* Work out what the segment length would be if we did combine
1888              DR_A1 and DR_A2:
1889
1890              - If DR_A1 and DR_A2 have equal lengths, that length is
1891                also the combined length.
1892
1893              - If DR_A1 and DR_A2 both have negative "lengths", the combined
1894                length is the lower bound on those lengths.
1895
1896              - If DR_A1 and DR_A2 both have positive lengths, the combined
1897                length is the upper bound on those lengths.
1898
1899              Other cases are unlikely to give a useful combination.
1900
1901              The lengths both have sizetype, so the sign is taken from
1902              the step instead.  */
1903           poly_uint64 new_seg_len = 0;
1904           bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1905                                                  dr_a2->seg_len, 0);
1906           if (new_seg_len_p)
1907             {
1908               poly_uint64 seg_len_a1, seg_len_a2;
1909               if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1910                   || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1911                 continue;
1912
1913               tree indicator_a = dr_direction_indicator (dr_a1->dr);
1914               if (TREE_CODE (indicator_a) != INTEGER_CST)
1915                 continue;
1916
1917               tree indicator_b = dr_direction_indicator (dr_a2->dr);
1918               if (TREE_CODE (indicator_b) != INTEGER_CST)
1919                 continue;
1920
1921               int sign_a = tree_int_cst_sgn (indicator_a);
1922               int sign_b = tree_int_cst_sgn (indicator_b);
1923
1924               if (sign_a <= 0 && sign_b <= 0)
1925                 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1926               else if (sign_a >= 0 && sign_b >= 0)
1927                 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1928               else
1929                 continue;
1930             }
1931           /* At this point we're committed to merging the refs.  */
1932
1933           /* Make sure dr_a1 starts left of dr_a2.  */
1934           if (maybe_gt (init_a1, init_a2))
1935             {
1936               std::swap (*dr_a1, *dr_a2);
1937               std::swap (init_a1, init_a2);
1938             }
1939
1940           /* The DR_Bs are equal, so only the DR_As can introduce
1941              mixed steps.  */
1942           if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1943             alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1944
1945           if (new_seg_len_p)
1946             {
1947               dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1948                                               new_seg_len);
1949               dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1950             }
1951
1952           /* This is always positive due to the swap above.  */
1953           poly_uint64 diff = init_a2 - init_a1;
1954
1955           /* The new check will start at DR_A1.  Make sure that its access
1956              size encompasses the initial DR_A2.  */
1957           if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1958             {
1959               dr_a1->access_size = upper_bound (dr_a1->access_size,
1960                                                 diff + dr_a2->access_size);
1961               unsigned int new_align = known_alignment (dr_a1->access_size);
1962               dr_a1->align = MIN (dr_a1->align, new_align);
1963             }
1964           if (dump_enabled_p ())
1965             dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1966                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1967                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1968           alias_pair1->flags |= alias_pair2->flags;
1969           last -= 1;
1970         }
1971     }
1972   alias_pairs->truncate (last + 1);
1973
1974   /* Try to restore the original dr_with_seg_len order within each
1975      dr_with_seg_len_pair_t.  If we ended up combining swapped and
1976      unswapped pairs into the same check, we have to invalidate any
1977      RAW, WAR and WAW information for it.  */
1978   if (dump_enabled_p ())
1979     dump_printf (MSG_NOTE, "merged alias checks:\n");
1980   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1981     {
1982       unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1983       unsigned int swapped = (alias_pair->flags & swap_mask);
1984       if (swapped == DR_ALIAS_SWAPPED)
1985         std::swap (alias_pair->first, alias_pair->second);
1986       else if (swapped != DR_ALIAS_UNSWAPPED)
1987         alias_pair->flags |= DR_ALIAS_ARBITRARY;
1988       alias_pair->flags &= ~swap_mask;
1989       if (dump_enabled_p ())
1990         dump_alias_pair (alias_pair, "  ");
1991     }
1992 }
1993
1994 /* A subroutine of create_intersect_range_checks, with a subset of the
1995    same arguments.  Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
1996    to optimize cases in which the references form a simple RAW, WAR or
1997    WAR dependence.  */
1998
1999 static bool
2000 create_ifn_alias_checks (tree *cond_expr,
2001                          const dr_with_seg_len_pair_t &alias_pair)
2002 {
2003   const dr_with_seg_len& dr_a = alias_pair.first;
2004   const dr_with_seg_len& dr_b = alias_pair.second;
2005
2006   /* Check for cases in which:
2007
2008      (a) we have a known RAW, WAR or WAR dependence
2009      (b) the accesses are well-ordered in both the original and new code
2010          (see the comment above the DR_ALIAS_* flags for details); and
2011      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
2012   if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
2013     return false;
2014
2015   /* Make sure that both DRs access the same pattern of bytes,
2016      with a constant length and step.  */
2017   poly_uint64 seg_len;
2018   if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
2019       || !poly_int_tree_p (dr_a.seg_len, &seg_len)
2020       || maybe_ne (dr_a.access_size, dr_b.access_size)
2021       || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
2022       || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
2023     return false;
2024
2025   unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
2026   tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
2027   tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
2028
2029   /* See whether the target suports what we want to do.  WAW checks are
2030      equivalent to WAR checks here.  */
2031   internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
2032                      ? IFN_CHECK_RAW_PTRS
2033                      : IFN_CHECK_WAR_PTRS);
2034   unsigned int align = MIN (dr_a.align, dr_b.align);
2035   poly_uint64 full_length = seg_len + bytes;
2036   if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2037                                            full_length, align))
2038     {
2039       full_length = seg_len + dr_a.access_size;
2040       if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2041                                                full_length, align))
2042         return false;
2043     }
2044
2045   /* Commit to using this form of test.  */
2046   addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
2047   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2048
2049   addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
2050   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2051
2052   *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
2053                                              ifn, boolean_type_node,
2054                                              4, addr_a, addr_b,
2055                                              size_int (full_length),
2056                                              size_int (align));
2057
2058   if (dump_enabled_p ())
2059     {
2060       if (ifn == IFN_CHECK_RAW_PTRS)
2061         dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
2062       else
2063         dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
2064     }
2065   return true;
2066 }
2067
2068 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
2069    free of aliases, using a condition based on index values instead
2070    of a condition based on addresses.  Return true on success,
2071    storing the condition in *COND_EXPR.
2072
2073    This can only be done if the two data references in ALIAS_PAIR access
2074    the same array object and the index is the only difference.  For example,
2075    if the two data references are DR_A and DR_B:
2076
2077                        DR_A                           DR_B
2078       data-ref         arr[i]                         arr[j]
2079       base_object      arr                            arr
2080       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
2081
2082    The addresses and their index are like:
2083
2084         |<- ADDR_A    ->|          |<- ADDR_B    ->|
2085      ------------------------------------------------------->
2086         |   |   |   |   |          |   |   |   |   |
2087      ------------------------------------------------------->
2088         i_0 ...         i_0+4      j_0 ...         j_0+4
2089
2090    We can create expression based on index rather than address:
2091
2092      (unsigned) (i_0 - j_0 + 3) <= 6
2093
2094    i.e. the indices are less than 4 apart.
2095
2096    Note evolution step of index needs to be considered in comparison.  */
2097
2098 static bool
2099 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
2100                                      const dr_with_seg_len_pair_t &alias_pair)
2101 {
2102   const dr_with_seg_len &dr_a = alias_pair.first;
2103   const dr_with_seg_len &dr_b = alias_pair.second;
2104   if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
2105       || integer_zerop (DR_STEP (dr_a.dr))
2106       || integer_zerop (DR_STEP (dr_b.dr))
2107       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2108     return false;
2109
2110   poly_uint64 seg_len1, seg_len2;
2111   if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
2112       || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
2113     return false;
2114
2115   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2116     return false;
2117
2118   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2119     return false;
2120
2121   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2122     return false;
2123
2124   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2125
2126   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2127   unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
2128   if (neg_step)
2129     {
2130       abs_step = -abs_step;
2131       seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
2132       seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
2133     }
2134
2135   /* Infer the number of iterations with which the memory segment is accessed
2136      by DR.  In other words, alias is checked if memory segment accessed by
2137      DR_A in some iterations intersect with memory segment accessed by DR_B
2138      in the same amount iterations.
2139      Note segnment length is a linear function of number of iterations with
2140      DR_STEP as the coefficient.  */
2141   poly_uint64 niter_len1, niter_len2;
2142   if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
2143       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
2144     return false;
2145
2146   /* Divide each access size by the byte step, rounding up.  */
2147   poly_uint64 niter_access1, niter_access2;
2148   if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
2149                         abs_step, &niter_access1)
2150       || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
2151                            abs_step, &niter_access2))
2152     return false;
2153
2154   bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
2155
2156   int found = -1;
2157   for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2158     {
2159       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2160       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2161       /* Two indices must be the same if they are not scev, or not scev wrto
2162          current loop being vecorized.  */
2163       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2164           || TREE_CODE (access2) != POLYNOMIAL_CHREC
2165           || CHREC_VARIABLE (access1) != (unsigned)loop->num
2166           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2167         {
2168           if (operand_equal_p (access1, access2, 0))
2169             continue;
2170
2171           return false;
2172         }
2173       if (found >= 0)
2174         return false;
2175       found = i;
2176     }
2177
2178   /* Ought not to happen in practice, since if all accesses are equal then the
2179      alias should be decidable at compile time.  */
2180   if (found < 0)
2181     return false;
2182
2183   /* The two indices must have the same step.  */
2184   tree access1 = DR_ACCESS_FN (dr_a.dr, found);
2185   tree access2 = DR_ACCESS_FN (dr_b.dr, found);
2186   if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2187     return false;
2188
2189   tree idx_step = CHREC_RIGHT (access1);
2190   /* Index must have const step, otherwise DR_STEP won't be constant.  */
2191   gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2192   /* Index must evaluate in the same direction as DR.  */
2193   gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2194
2195   tree min1 = CHREC_LEFT (access1);
2196   tree min2 = CHREC_LEFT (access2);
2197   if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2198     return false;
2199
2200   /* Ideally, alias can be checked against loop's control IV, but we
2201      need to prove linear mapping between control IV and reference
2202      index.  Although that should be true, we check against (array)
2203      index of data reference.  Like segment length, index length is
2204      linear function of the number of iterations with index_step as
2205      the coefficient, i.e, niter_len * idx_step.  */
2206   offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
2207                                               SIGNED);
2208   if (neg_step)
2209     abs_idx_step = -abs_idx_step;
2210   poly_offset_int idx_len1 = abs_idx_step * niter_len1;
2211   poly_offset_int idx_len2 = abs_idx_step * niter_len2;
2212   poly_offset_int idx_access1 = abs_idx_step * niter_access1;
2213   poly_offset_int idx_access2 = abs_idx_step * niter_access2;
2214
2215   gcc_assert (known_ge (idx_len1, 0)
2216               && known_ge (idx_len2, 0)
2217               && known_ge (idx_access1, 0)
2218               && known_ge (idx_access2, 0));
2219
2220   /* Each access has the following pattern, with lengths measured
2221      in units of INDEX:
2222
2223           <-- idx_len -->
2224           <--- A: -ve step --->
2225           +-----+-------+-----+-------+-----+
2226           | n-1 | ..... |  0  | ..... | n-1 |
2227           +-----+-------+-----+-------+-----+
2228                         <--- B: +ve step --->
2229                         <-- idx_len -->
2230                         |
2231                        min
2232
2233      where "n" is the number of scalar iterations covered by the segment
2234      and where each access spans idx_access units.
2235
2236      A is the range of bytes accessed when the step is negative,
2237      B is the range when the step is positive.
2238
2239      When checking for general overlap, we need to test whether
2240      the range:
2241
2242        [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2243
2244      overlaps:
2245
2246        [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2247
2248      where:
2249
2250         low_offsetN = +ve step ? 0 : -idx_lenN;
2251        high_offsetN = +ve step ? idx_lenN : 0;
2252
2253      This is equivalent to testing whether:
2254
2255        min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2256        && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2257
2258      Converting this into a single test, there is an overlap if:
2259
2260        0 <= min2 - min1 + bias <= limit
2261
2262      where  bias = high_offset2 + idx_access2 - 1 - low_offset1
2263            limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2264                  + (high_offset2 - low_offset2 + idx_access2 - 1)
2265       i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2266
2267      Combining the tests requires limit to be computable in an unsigned
2268      form of the index type; if it isn't, we fall back to the usual
2269      pointer-based checks.
2270
2271      We can do better if DR_B is a write and if DR_A and DR_B are
2272      well-ordered in both the original and the new code (see the
2273      comment above the DR_ALIAS_* flags for details).  In this case
2274      we know that for each i in [0, n-1], the write performed by
2275      access i of DR_B occurs after access numbers j<=i of DR_A in
2276      both the original and the new code.  Any write or anti
2277      dependencies wrt those DR_A accesses are therefore maintained.
2278
2279      We just need to make sure that each individual write in DR_B does not
2280      overlap any higher-indexed access in DR_A; such DR_A accesses happen
2281      after the DR_B access in the original code but happen before it in
2282      the new code.
2283
2284      We know the steps for both accesses are equal, so by induction, we
2285      just need to test whether the first write of DR_B overlaps a later
2286      access of DR_A.  In other words, we need to move min1 along by
2287      one iteration:
2288
2289        min1' = min1 + idx_step
2290
2291      and use the ranges:
2292
2293        [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2294
2295      and:
2296
2297        [min2, min2 + idx_access2 - 1]
2298
2299      where:
2300
2301         low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2302        high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0.  */
2303   if (waw_or_war_p)
2304     idx_len1 -= abs_idx_step;
2305
2306   poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2307   if (!waw_or_war_p)
2308     limit += idx_len2;
2309
2310   tree utype = unsigned_type_for (TREE_TYPE (min1));
2311   if (!wi::fits_to_tree_p (limit, utype))
2312     return false;
2313
2314   poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2315   poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2316   poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2317   /* Equivalent to adding IDX_STEP to MIN1.  */
2318   if (waw_or_war_p)
2319     bias -= wi::to_offset (idx_step);
2320
2321   tree subject = fold_build2 (MINUS_EXPR, utype,
2322                               fold_convert (utype, min2),
2323                               fold_convert (utype, min1));
2324   subject = fold_build2 (PLUS_EXPR, utype, subject,
2325                          wide_int_to_tree (utype, bias));
2326   tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2327                                      wide_int_to_tree (utype, limit));
2328   if (*cond_expr)
2329     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2330                               *cond_expr, part_cond_expr);
2331   else
2332     *cond_expr = part_cond_expr;
2333   if (dump_enabled_p ())
2334     {
2335       if (waw_or_war_p)
2336         dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2337       else
2338         dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2339     }
2340   return true;
2341 }
2342
2343 /* A subroutine of create_intersect_range_checks, with a subset of the
2344    same arguments.  Try to optimize cases in which the second access
2345    is a write and in which some overlap is valid.  */
2346
2347 static bool
2348 create_waw_or_war_checks (tree *cond_expr,
2349                           const dr_with_seg_len_pair_t &alias_pair)
2350 {
2351   const dr_with_seg_len& dr_a = alias_pair.first;
2352   const dr_with_seg_len& dr_b = alias_pair.second;
2353
2354   /* Check for cases in which:
2355
2356      (a) DR_B is always a write;
2357      (b) the accesses are well-ordered in both the original and new code
2358          (see the comment above the DR_ALIAS_* flags for details); and
2359      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
2360   if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2361     return false;
2362
2363   /* Check for equal (but possibly variable) steps.  */
2364   tree step = DR_STEP (dr_a.dr);
2365   if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2366     return false;
2367
2368   /* Make sure that we can operate on sizetype without loss of precision.  */
2369   tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2370   if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2371     return false;
2372
2373   /* All addresses involved are known to have a common alignment ALIGN.
2374      We can therefore subtract ALIGN from an exclusive endpoint to get
2375      an inclusive endpoint.  In the best (and common) case, ALIGN is the
2376      same as the access sizes of both DRs, and so subtracting ALIGN
2377      cancels out the addition of an access size.  */
2378   unsigned int align = MIN (dr_a.align, dr_b.align);
2379   poly_uint64 last_chunk_a = dr_a.access_size - align;
2380   poly_uint64 last_chunk_b = dr_b.access_size - align;
2381
2382   /* Get a boolean expression that is true when the step is negative.  */
2383   tree indicator = dr_direction_indicator (dr_a.dr);
2384   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2385                                fold_convert (ssizetype, indicator),
2386                                ssize_int (0));
2387
2388   /* Get lengths in sizetype.  */
2389   tree seg_len_a
2390     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2391   step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2392
2393   /* Each access has the following pattern:
2394
2395           <- |seg_len| ->
2396           <--- A: -ve step --->
2397           +-----+-------+-----+-------+-----+
2398           | n-1 | ..... |  0  | ..... | n-1 |
2399           +-----+-------+-----+-------+-----+
2400                         <--- B: +ve step --->
2401                         <- |seg_len| ->
2402                         |
2403                    base address
2404
2405      where "n" is the number of scalar iterations covered by the segment.
2406
2407      A is the range of bytes accessed when the step is negative,
2408      B is the range when the step is positive.
2409
2410      We know that DR_B is a write.  We also know (from checking that
2411      DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2412      the write performed by access i of DR_B occurs after access numbers
2413      j<=i of DR_A in both the original and the new code.  Any write or
2414      anti dependencies wrt those DR_A accesses are therefore maintained.
2415
2416      We just need to make sure that each individual write in DR_B does not
2417      overlap any higher-indexed access in DR_A; such DR_A accesses happen
2418      after the DR_B access in the original code but happen before it in
2419      the new code.
2420
2421      We know the steps for both accesses are equal, so by induction, we
2422      just need to test whether the first write of DR_B overlaps a later
2423      access of DR_A.  In other words, we need to move addr_a along by
2424      one iteration:
2425
2426        addr_a' = addr_a + step
2427
2428      and check whether:
2429
2430        [addr_b, addr_b + last_chunk_b]
2431
2432      overlaps:
2433
2434        [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2435
2436      where [low_offset_a, high_offset_a] spans accesses [1, n-1].  I.e.:
2437
2438         low_offset_a = +ve step ? 0 : seg_len_a - step
2439        high_offset_a = +ve step ? seg_len_a - step : 0
2440
2441      This is equivalent to testing whether:
2442
2443        addr_a' + low_offset_a <= addr_b + last_chunk_b
2444        && addr_b <= addr_a' + high_offset_a + last_chunk_a
2445
2446      Converting this into a single test, there is an overlap if:
2447
2448        0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2449
2450      where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2451
2452      If DR_A is performed, limit + |step| - last_chunk_b is known to be
2453      less than the size of the object underlying DR_A.  We also know
2454      that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2455      guaranteed at compile time.  There can therefore be no overflow if
2456      "limit" is calculated in an unsigned type with pointer precision.  */
2457   tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2458                                          DR_OFFSET (dr_a.dr));
2459   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2460
2461   tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2462                                          DR_OFFSET (dr_b.dr));
2463   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2464
2465   /* Advance ADDR_A by one iteration and adjust the length to compensate.  */
2466   addr_a = fold_build_pointer_plus (addr_a, step);
2467   tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2468                                            seg_len_a, step);
2469   if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2470     seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2471
2472   tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2473                                    seg_len_a_minus_step, size_zero_node);
2474   if (!CONSTANT_CLASS_P (low_offset_a))
2475     low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2476
2477   /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2478      but it's usually more efficient to reuse the LOW_OFFSET_A result.  */
2479   tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2480                                     low_offset_a);
2481
2482   /* The amount added to addr_b - addr_a'.  */
2483   tree bias = fold_build2 (MINUS_EXPR, sizetype,
2484                            size_int (last_chunk_b), low_offset_a);
2485
2486   tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2487   limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2488                        size_int (last_chunk_a + last_chunk_b));
2489
2490   tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
2491   subject = fold_build2 (PLUS_EXPR, sizetype,
2492                          fold_convert (sizetype, subject), bias);
2493
2494   *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2495   if (dump_enabled_p ())
2496     dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2497   return true;
2498 }
2499
2500 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2501    every address ADDR accessed by D:
2502
2503      *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2504
2505    In this case, every element accessed by D is aligned to at least
2506    ALIGN bytes.
2507
2508    If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2509
2510      *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT.  */
2511
2512 static void
2513 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2514                      tree *seg_max_out, HOST_WIDE_INT align)
2515 {
2516   /* Each access has the following pattern:
2517
2518           <- |seg_len| ->
2519           <--- A: -ve step --->
2520           +-----+-------+-----+-------+-----+
2521           | n-1 | ,.... |  0  | ..... | n-1 |
2522           +-----+-------+-----+-------+-----+
2523                         <--- B: +ve step --->
2524                         <- |seg_len| ->
2525                         |
2526                    base address
2527
2528      where "n" is the number of scalar iterations covered by the segment.
2529      (This should be VF for a particular pair if we know that both steps
2530      are the same, otherwise it will be the full number of scalar loop
2531      iterations.)
2532
2533      A is the range of bytes accessed when the step is negative,
2534      B is the range when the step is positive.
2535
2536      If the access size is "access_size" bytes, the lowest addressed byte is:
2537
2538          base + (step < 0 ? seg_len : 0)   [LB]
2539
2540      and the highest addressed byte is always below:
2541
2542          base + (step < 0 ? 0 : seg_len) + access_size   [UB]
2543
2544      Thus:
2545
2546          LB <= ADDR < UB
2547
2548      If ALIGN is nonzero, all three values are aligned to at least ALIGN
2549      bytes, so:
2550
2551          LB <= ADDR <= UB - ALIGN
2552
2553      where "- ALIGN" folds naturally with the "+ access_size" and often
2554      cancels it out.
2555
2556      We don't try to simplify LB and UB beyond this (e.g. by using
2557      MIN and MAX based on whether seg_len rather than the stride is
2558      negative) because it is possible for the absolute size of the
2559      segment to overflow the range of a ssize_t.
2560
2561      Keeping the pointer_plus outside of the cond_expr should allow
2562      the cond_exprs to be shared with other alias checks.  */
2563   tree indicator = dr_direction_indicator (d.dr);
2564   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2565                                fold_convert (ssizetype, indicator),
2566                                ssize_int (0));
2567   tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2568                                             DR_OFFSET (d.dr));
2569   addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2570   tree seg_len
2571     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2572
2573   tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2574                                 seg_len, size_zero_node);
2575   tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2576                                 size_zero_node, seg_len);
2577   max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2578                            size_int (d.access_size - align));
2579
2580   *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2581   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2582 }
2583
2584 /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2585    storing the condition in *COND_EXPR.  The fallback is to generate a
2586    a test that the two accesses do not overlap:
2587
2588      end_a <= start_b || end_b <= start_a.  */
2589
2590 static void
2591 create_intersect_range_checks (class loop *loop, tree *cond_expr,
2592                                const dr_with_seg_len_pair_t &alias_pair)
2593 {
2594   const dr_with_seg_len& dr_a = alias_pair.first;
2595   const dr_with_seg_len& dr_b = alias_pair.second;
2596   *cond_expr = NULL_TREE;
2597   if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2598     return;
2599
2600   if (create_ifn_alias_checks (cond_expr, alias_pair))
2601     return;
2602
2603   if (create_waw_or_war_checks (cond_expr, alias_pair))
2604     return;
2605
2606   unsigned HOST_WIDE_INT min_align;
2607   tree_code cmp_code;
2608   /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2609      are equivalent.  This is just an optimization heuristic.  */
2610   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2611       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2612     {
2613       /* In this case adding access_size to seg_len is likely to give
2614          a simple X * step, where X is either the number of scalar
2615          iterations or the vectorization factor.  We're better off
2616          keeping that, rather than subtracting an alignment from it.
2617
2618          In this case the maximum values are exclusive and so there is
2619          no alias if the maximum of one segment equals the minimum
2620          of another.  */
2621       min_align = 0;
2622       cmp_code = LE_EXPR;
2623     }
2624   else
2625     {
2626       /* Calculate the minimum alignment shared by all four pointers,
2627          then arrange for this alignment to be subtracted from the
2628          exclusive maximum values to get inclusive maximum values.
2629          This "- min_align" is cumulative with a "+ access_size"
2630          in the calculation of the maximum values.  In the best
2631          (and common) case, the two cancel each other out, leaving
2632          us with an inclusive bound based only on seg_len.  In the
2633          worst case we're simply adding a smaller number than before.
2634
2635          Because the maximum values are inclusive, there is an alias
2636          if the maximum value of one segment is equal to the minimum
2637          value of the other.  */
2638       min_align = MIN (dr_a.align, dr_b.align);
2639       cmp_code = LT_EXPR;
2640     }
2641
2642   tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2643   get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2644   get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2645
2646   *cond_expr
2647     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2648         fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2649         fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2650   if (dump_enabled_p ())
2651     dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2652 }
2653
2654 /* Create a conditional expression that represents the run-time checks for
2655    overlapping of address ranges represented by a list of data references
2656    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
2657    COND_EXPR is the conditional expression to be used in the if statement
2658    that controls which version of the loop gets executed at runtime.  */
2659
2660 void
2661 create_runtime_alias_checks (class loop *loop,
2662                              const vec<dr_with_seg_len_pair_t> *alias_pairs,
2663                              tree * cond_expr)
2664 {
2665   tree part_cond_expr;
2666
2667   fold_defer_overflow_warnings ();
2668   for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
2669     {
2670       gcc_assert (alias_pair.flags);
2671       if (dump_enabled_p ())
2672         dump_printf (MSG_NOTE,
2673                      "create runtime check for data references %T and %T\n",
2674                      DR_REF (alias_pair.first.dr),
2675                      DR_REF (alias_pair.second.dr));
2676
2677       /* Create condition expression for each pair data references.  */
2678       create_intersect_range_checks (loop, &part_cond_expr, alias_pair);
2679       if (*cond_expr)
2680         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2681                                   *cond_expr, part_cond_expr);
2682       else
2683         *cond_expr = part_cond_expr;
2684     }
2685   fold_undefer_and_ignore_overflow_warnings ();
2686 }
2687
2688 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2689    expressions.  */
2690 static bool
2691 dr_equal_offsets_p1 (tree offset1, tree offset2)
2692 {
2693   bool res;
2694
2695   STRIP_NOPS (offset1);
2696   STRIP_NOPS (offset2);
2697
2698   if (offset1 == offset2)
2699     return true;
2700
2701   if (TREE_CODE (offset1) != TREE_CODE (offset2)
2702       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2703     return false;
2704
2705   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2706                              TREE_OPERAND (offset2, 0));
2707
2708   if (!res || !BINARY_CLASS_P (offset1))
2709     return res;
2710
2711   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2712                              TREE_OPERAND (offset2, 1));
2713
2714   return res;
2715 }
2716
2717 /* Check if DRA and DRB have equal offsets.  */
2718 bool
2719 dr_equal_offsets_p (struct data_reference *dra,
2720                     struct data_reference *drb)
2721 {
2722   tree offset1, offset2;
2723
2724   offset1 = DR_OFFSET (dra);
2725   offset2 = DR_OFFSET (drb);
2726
2727   return dr_equal_offsets_p1 (offset1, offset2);
2728 }
2729
2730 /* Returns true if FNA == FNB.  */
2731
2732 static bool
2733 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2734 {
2735   unsigned i, n = fna.length ();
2736
2737   if (n != fnb.length ())
2738     return false;
2739
2740   for (i = 0; i < n; i++)
2741     if (!operand_equal_p (fna[i], fnb[i], 0))
2742       return false;
2743
2744   return true;
2745 }
2746
2747 /* If all the functions in CF are the same, returns one of them,
2748    otherwise returns NULL.  */
2749
2750 static affine_fn
2751 common_affine_function (conflict_function *cf)
2752 {
2753   unsigned i;
2754   affine_fn comm;
2755
2756   if (!CF_NONTRIVIAL_P (cf))
2757     return affine_fn ();
2758
2759   comm = cf->fns[0];
2760
2761   for (i = 1; i < cf->n; i++)
2762     if (!affine_function_equal_p (comm, cf->fns[i]))
2763       return affine_fn ();
2764
2765   return comm;
2766 }
2767
2768 /* Returns the base of the affine function FN.  */
2769
2770 static tree
2771 affine_function_base (affine_fn fn)
2772 {
2773   return fn[0];
2774 }
2775
2776 /* Returns true if FN is a constant.  */
2777
2778 static bool
2779 affine_function_constant_p (affine_fn fn)
2780 {
2781   unsigned i;
2782   tree coef;
2783
2784   for (i = 1; fn.iterate (i, &coef); i++)
2785     if (!integer_zerop (coef))
2786       return false;
2787
2788   return true;
2789 }
2790
2791 /* Returns true if FN is the zero constant function.  */
2792
2793 static bool
2794 affine_function_zero_p (affine_fn fn)
2795 {
2796   return (integer_zerop (affine_function_base (fn))
2797           && affine_function_constant_p (fn));
2798 }
2799
2800 /* Returns a signed integer type with the largest precision from TA
2801    and TB.  */
2802
2803 static tree
2804 signed_type_for_types (tree ta, tree tb)
2805 {
2806   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2807     return signed_type_for (ta);
2808   else
2809     return signed_type_for (tb);
2810 }
2811
2812 /* Applies operation OP on affine functions FNA and FNB, and returns the
2813    result.  */
2814
2815 static affine_fn
2816 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2817 {
2818   unsigned i, n, m;
2819   affine_fn ret;
2820   tree coef;
2821
2822   if (fnb.length () > fna.length ())
2823     {
2824       n = fna.length ();
2825       m = fnb.length ();
2826     }
2827   else
2828     {
2829       n = fnb.length ();
2830       m = fna.length ();
2831     }
2832
2833   ret.create (m);
2834   for (i = 0; i < n; i++)
2835     {
2836       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2837                                          TREE_TYPE (fnb[i]));
2838       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2839     }
2840
2841   for (; fna.iterate (i, &coef); i++)
2842     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2843                                  coef, integer_zero_node));
2844   for (; fnb.iterate (i, &coef); i++)
2845     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2846                                  integer_zero_node, coef));
2847
2848   return ret;
2849 }
2850
2851 /* Returns the sum of affine functions FNA and FNB.  */
2852
2853 static affine_fn
2854 affine_fn_plus (affine_fn fna, affine_fn fnb)
2855 {
2856   return affine_fn_op (PLUS_EXPR, fna, fnb);
2857 }
2858
2859 /* Returns the difference of affine functions FNA and FNB.  */
2860
2861 static affine_fn
2862 affine_fn_minus (affine_fn fna, affine_fn fnb)
2863 {
2864   return affine_fn_op (MINUS_EXPR, fna, fnb);
2865 }
2866
2867 /* Frees affine function FN.  */
2868
2869 static void
2870 affine_fn_free (affine_fn fn)
2871 {
2872   fn.release ();
2873 }
2874
2875 /* Determine for each subscript in the data dependence relation DDR
2876    the distance.  */
2877
2878 static void
2879 compute_subscript_distance (struct data_dependence_relation *ddr)
2880 {
2881   conflict_function *cf_a, *cf_b;
2882   affine_fn fn_a, fn_b, diff;
2883
2884   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2885     {
2886       unsigned int i;
2887
2888       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2889         {
2890           struct subscript *subscript;
2891
2892           subscript = DDR_SUBSCRIPT (ddr, i);
2893           cf_a = SUB_CONFLICTS_IN_A (subscript);
2894           cf_b = SUB_CONFLICTS_IN_B (subscript);
2895
2896           fn_a = common_affine_function (cf_a);
2897           fn_b = common_affine_function (cf_b);
2898           if (!fn_a.exists () || !fn_b.exists ())
2899             {
2900               SUB_DISTANCE (subscript) = chrec_dont_know;
2901               return;
2902             }
2903           diff = affine_fn_minus (fn_a, fn_b);
2904
2905           if (affine_function_constant_p (diff))
2906             SUB_DISTANCE (subscript) = affine_function_base (diff);
2907           else
2908             SUB_DISTANCE (subscript) = chrec_dont_know;
2909
2910           affine_fn_free (diff);
2911         }
2912     }
2913 }
2914
2915 /* Returns the conflict function for "unknown".  */
2916
2917 static conflict_function *
2918 conflict_fn_not_known (void)
2919 {
2920   conflict_function *fn = XCNEW (conflict_function);
2921   fn->n = NOT_KNOWN;
2922
2923   return fn;
2924 }
2925
2926 /* Returns the conflict function for "independent".  */
2927
2928 static conflict_function *
2929 conflict_fn_no_dependence (void)
2930 {
2931   conflict_function *fn = XCNEW (conflict_function);
2932   fn->n = NO_DEPENDENCE;
2933
2934   return fn;
2935 }
2936
2937 /* Returns true if the address of OBJ is invariant in LOOP.  */
2938
2939 static bool
2940 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2941 {
2942   while (handled_component_p (obj))
2943     {
2944       if (TREE_CODE (obj) == ARRAY_REF)
2945         {
2946           for (int i = 1; i < 4; ++i)
2947             if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2948                                                         loop->num))
2949               return false;
2950         }
2951       else if (TREE_CODE (obj) == COMPONENT_REF)
2952         {
2953           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2954                                                       loop->num))
2955             return false;
2956         }
2957       obj = TREE_OPERAND (obj, 0);
2958     }
2959
2960   if (!INDIRECT_REF_P (obj)
2961       && TREE_CODE (obj) != MEM_REF)
2962     return true;
2963
2964   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2965                                                   loop->num);
2966 }
2967
2968 /* Returns false if we can prove that data references A and B do not alias,
2969    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2970    considered.  */
2971
2972 bool
2973 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2974                 class loop *loop_nest)
2975 {
2976   tree addr_a = DR_BASE_OBJECT (a);
2977   tree addr_b = DR_BASE_OBJECT (b);
2978
2979   /* If we are not processing a loop nest but scalar code we
2980      do not need to care about possible cross-iteration dependences
2981      and thus can process the full original reference.  Do so,
2982      similar to how loop invariant motion applies extra offset-based
2983      disambiguation.  */
2984   if (!loop_nest)
2985     {
2986       tree tree_size_a = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a)));
2987       tree tree_size_b = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b)));
2988
2989       if (DR_BASE_ADDRESS (a)
2990           && DR_BASE_ADDRESS (b)
2991           && operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b))
2992           && operand_equal_p (DR_OFFSET (a), DR_OFFSET (b))
2993           && poly_int_tree_p (tree_size_a)
2994           && poly_int_tree_p (tree_size_b)
2995           && !ranges_maybe_overlap_p (wi::to_poly_widest (DR_INIT (a)),
2996                                       wi::to_poly_widest (tree_size_a),
2997                                       wi::to_poly_widest (DR_INIT (b)),
2998                                       wi::to_poly_widest (tree_size_b)))
2999         {
3000           gcc_assert (integer_zerop (DR_STEP (a))
3001                       && integer_zerop (DR_STEP (b)));
3002           return false;
3003         }
3004
3005       aff_tree off1, off2;
3006       poly_widest_int size1, size2;
3007       get_inner_reference_aff (DR_REF (a), &off1, &size1);
3008       get_inner_reference_aff (DR_REF (b), &off2, &size2);
3009       aff_combination_scale (&off1, -1);
3010       aff_combination_add (&off2, &off1);
3011       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
3012         return false;
3013     }
3014
3015   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
3016       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
3017       /* For cross-iteration dependences the cliques must be valid for the
3018          whole loop, not just individual iterations.  */
3019       && (!loop_nest
3020           || MR_DEPENDENCE_CLIQUE (addr_a) == 1
3021           || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
3022       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
3023       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
3024     return false;
3025
3026   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
3027      do not know the size of the base-object.  So we cannot do any
3028      offset/overlap based analysis but have to rely on points-to
3029      information only.  */
3030   if (TREE_CODE (addr_a) == MEM_REF
3031       && (DR_UNCONSTRAINED_BASE (a)
3032           || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
3033     {
3034       /* For true dependences we can apply TBAA.  */
3035       if (flag_strict_aliasing
3036           && DR_IS_WRITE (a) && DR_IS_READ (b)
3037           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3038                                      get_alias_set (DR_REF (b))))
3039         return false;
3040       if (TREE_CODE (addr_b) == MEM_REF)
3041         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3042                                        TREE_OPERAND (addr_b, 0));
3043       else
3044         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3045                                        build_fold_addr_expr (addr_b));
3046     }
3047   else if (TREE_CODE (addr_b) == MEM_REF
3048            && (DR_UNCONSTRAINED_BASE (b)
3049                || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
3050     {
3051       /* For true dependences we can apply TBAA.  */
3052       if (flag_strict_aliasing
3053           && DR_IS_WRITE (a) && DR_IS_READ (b)
3054           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3055                                      get_alias_set (DR_REF (b))))
3056         return false;
3057       if (TREE_CODE (addr_a) == MEM_REF)
3058         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3059                                        TREE_OPERAND (addr_b, 0));
3060       else
3061         return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3062                                        TREE_OPERAND (addr_b, 0));
3063     }
3064
3065   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3066      that is being subsetted in the loop nest.  */
3067   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
3068     return refs_output_dependent_p (addr_a, addr_b);
3069   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
3070     return refs_anti_dependent_p (addr_a, addr_b);
3071   return refs_may_alias_p (addr_a, addr_b);
3072 }
3073
3074 /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
3075    if it is meaningful to compare their associated access functions
3076    when checking for dependencies.  */
3077
3078 static bool
3079 access_fn_components_comparable_p (tree ref_a, tree ref_b)
3080 {
3081   /* Allow pairs of component refs from the following sets:
3082
3083        { REALPART_EXPR, IMAGPART_EXPR }
3084        { COMPONENT_REF }
3085        { ARRAY_REF }.  */
3086   tree_code code_a = TREE_CODE (ref_a);
3087   tree_code code_b = TREE_CODE (ref_b);
3088   if (code_a == IMAGPART_EXPR)
3089     code_a = REALPART_EXPR;
3090   if (code_b == IMAGPART_EXPR)
3091     code_b = REALPART_EXPR;
3092   if (code_a != code_b)
3093     return false;
3094
3095   if (TREE_CODE (ref_a) == COMPONENT_REF)
3096     /* ??? We cannot simply use the type of operand #0 of the refs here as
3097        the Fortran compiler smuggles type punning into COMPONENT_REFs.
3098        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
3099     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
3100             == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
3101
3102   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
3103                              TREE_TYPE (TREE_OPERAND (ref_b, 0)));
3104 }
3105
3106 /* Initialize a data dependence relation RES in LOOP_NEST.  USE_ALT_INDICES
3107    is true when the main indices of A and B were not comparable so we try again
3108    with alternate indices computed on an indirect reference.  */
3109
3110 struct data_dependence_relation *
3111 initialize_data_dependence_relation (struct data_dependence_relation *res,
3112                                      vec<loop_p> loop_nest,
3113                                      bool use_alt_indices)
3114 {
3115   struct data_reference *a = DDR_A (res);
3116   struct data_reference *b = DDR_B (res);
3117   unsigned int i;
3118
3119   struct indices *indices_a = &a->indices;
3120   struct indices *indices_b = &b->indices;
3121   if (use_alt_indices)
3122     {
3123       if (TREE_CODE (DR_REF (a)) != MEM_REF)
3124         indices_a = &a->alt_indices;
3125       if (TREE_CODE (DR_REF (b)) != MEM_REF)
3126         indices_b = &b->alt_indices;
3127     }
3128   unsigned int num_dimensions_a = indices_a->access_fns.length ();
3129   unsigned int num_dimensions_b = indices_b->access_fns.length ();
3130   if (num_dimensions_a == 0 || num_dimensions_b == 0)
3131     {
3132       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3133       return res;
3134     }
3135
3136   /* For unconstrained bases, the root (highest-indexed) subscript
3137      describes a variation in the base of the original DR_REF rather
3138      than a component access.  We have no type that accurately describes
3139      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3140      applying this subscript) so limit the search to the last real
3141      component access.
3142
3143      E.g. for:
3144
3145         void
3146         f (int a[][8], int b[][8])
3147         {
3148           for (int i = 0; i < 8; ++i)
3149             a[i * 2][0] = b[i][0];
3150         }
3151
3152      the a and b accesses have a single ARRAY_REF component reference [0]
3153      but have two subscripts.  */
3154   if (indices_a->unconstrained_base)
3155     num_dimensions_a -= 1;
3156   if (indices_b->unconstrained_base)
3157     num_dimensions_b -= 1;
3158
3159   /* These structures describe sequences of component references in
3160      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
3161      specific access function.  */
3162   struct {
3163     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3164        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3165        indices.  In C notation, these are the indices of the rightmost
3166        component references; e.g. for a sequence .b.c.d, the start
3167        index is for .d.  */
3168     unsigned int start_a;
3169     unsigned int start_b;
3170
3171     /* The sequence contains LENGTH consecutive access functions from
3172        each DR.  */
3173     unsigned int length;
3174
3175     /* The enclosing objects for the A and B sequences respectively,
3176        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3177        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
3178     tree object_a;
3179     tree object_b;
3180   } full_seq = {}, struct_seq = {};
3181
3182   /* Before each iteration of the loop:
3183
3184      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3185      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
3186   unsigned int index_a = 0;
3187   unsigned int index_b = 0;
3188   tree ref_a = DR_REF (a);
3189   tree ref_b = DR_REF (b);
3190
3191   /* Now walk the component references from the final DR_REFs back up to
3192      the enclosing base objects.  Each component reference corresponds
3193      to one access function in the DR, with access function 0 being for
3194      the final DR_REF and the highest-indexed access function being the
3195      one that is applied to the base of the DR.
3196
3197      Look for a sequence of component references whose access functions
3198      are comparable (see access_fn_components_comparable_p).  If more
3199      than one such sequence exists, pick the one nearest the base
3200      (which is the leftmost sequence in C notation).  Store this sequence
3201      in FULL_SEQ.
3202
3203      For example, if we have:
3204
3205         struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3206
3207         A: a[0][i].s.c.d
3208         B: __real b[0][i].s.e[i].f
3209
3210      (where d is the same type as the real component of f) then the access
3211      functions would be:
3212
3213                          0   1   2   3
3214         A:              .d  .c  .s [i]
3215
3216                  0   1   2   3   4   5
3217         B:  __real  .f [i]  .e  .s [i]
3218
3219      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3220      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
3221      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
3222      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3223      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
3224      index foo[10] arrays, so is again comparable.  The sequence is
3225      therefore:
3226
3227         A: [1, 3]  (i.e. [i].s.c)
3228         B: [3, 5]  (i.e. [i].s.e)
3229
3230      Also look for sequences of component references whose access
3231      functions are comparable and whose enclosing objects have the same
3232      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
3233      example, STRUCT_SEQ would be:
3234
3235         A: [1, 2]  (i.e. s.c)
3236         B: [3, 4]  (i.e. s.e)  */
3237   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
3238     {
3239       /* The alternate indices form always has a single dimension
3240          with unconstrained base.  */
3241       gcc_assert (!use_alt_indices);
3242
3243       /* REF_A and REF_B must be one of the component access types
3244          allowed by dr_analyze_indices.  */
3245       gcc_checking_assert (access_fn_component_p (ref_a));
3246       gcc_checking_assert (access_fn_component_p (ref_b));
3247
3248       /* Get the immediately-enclosing objects for REF_A and REF_B,
3249          i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3250          and DR_ACCESS_FN (B, INDEX_B).  */
3251       tree object_a = TREE_OPERAND (ref_a, 0);
3252       tree object_b = TREE_OPERAND (ref_b, 0);
3253
3254       tree type_a = TREE_TYPE (object_a);
3255       tree type_b = TREE_TYPE (object_b);
3256       if (access_fn_components_comparable_p (ref_a, ref_b))
3257         {
3258           /* This pair of component accesses is comparable for dependence
3259              analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3260              DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
3261           if (full_seq.start_a + full_seq.length != index_a
3262               || full_seq.start_b + full_seq.length != index_b)
3263             {
3264               /* The accesses don't extend the current sequence,
3265                  so start a new one here.  */
3266               full_seq.start_a = index_a;
3267               full_seq.start_b = index_b;
3268               full_seq.length = 0;
3269             }
3270
3271           /* Add this pair of references to the sequence.  */
3272           full_seq.length += 1;
3273           full_seq.object_a = object_a;
3274           full_seq.object_b = object_b;
3275
3276           /* If the enclosing objects are structures (and thus have the
3277              same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
3278           if (TREE_CODE (type_a) == RECORD_TYPE)
3279             struct_seq = full_seq;
3280
3281           /* Move to the next containing reference for both A and B.  */
3282           ref_a = object_a;
3283           ref_b = object_b;
3284           index_a += 1;
3285           index_b += 1;
3286           continue;
3287         }
3288
3289       /* Try to approach equal type sizes.  */
3290       if (!COMPLETE_TYPE_P (type_a)
3291           || !COMPLETE_TYPE_P (type_b)
3292           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3293           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3294         break;
3295
3296       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3297       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3298       if (size_a <= size_b)
3299         {
3300           index_a += 1;
3301           ref_a = object_a;
3302         }
3303       if (size_b <= size_a)
3304         {
3305           index_b += 1;
3306           ref_b = object_b;
3307         }
3308     }
3309
3310   /* See whether FULL_SEQ ends at the base and whether the two bases
3311      are equal.  We do not care about TBAA or alignment info so we can
3312      use OEP_ADDRESS_OF to avoid false negatives.  */
3313   tree base_a = indices_a->base_object;
3314   tree base_b = indices_b->base_object;
3315   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3316                       && full_seq.start_b + full_seq.length == num_dimensions_b
3317                       && (indices_a->unconstrained_base
3318                           == indices_b->unconstrained_base)
3319                       && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3320                       && (types_compatible_p (TREE_TYPE (base_a),
3321                                               TREE_TYPE (base_b))
3322                           || (!base_supports_access_fn_components_p (base_a)
3323                               && !base_supports_access_fn_components_p (base_b)
3324                               && operand_equal_p
3325                                    (TYPE_SIZE (TREE_TYPE (base_a)),
3326                                     TYPE_SIZE (TREE_TYPE (base_b)), 0)))
3327                       && (!loop_nest.exists ()
3328                           || (object_address_invariant_in_loop_p
3329                               (loop_nest[0], base_a))));
3330
3331   /* If the bases are the same, we can include the base variation too.
3332      E.g. the b accesses in:
3333
3334        for (int i = 0; i < n; ++i)
3335          b[i + 4][0] = b[i][0];
3336
3337      have a definite dependence distance of 4, while for:
3338
3339        for (int i = 0; i < n; ++i)
3340          a[i + 4][0] = b[i][0];
3341
3342      the dependence distance depends on the gap between a and b.
3343
3344      If the bases are different then we can only rely on the sequence
3345      rooted at a structure access, since arrays are allowed to overlap
3346      arbitrarily and change shape arbitrarily.  E.g. we treat this as
3347      valid code:
3348
3349        int a[256];
3350        ...
3351        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3352
3353      where two lvalues with the same int[4][3] type overlap, and where
3354      both lvalues are distinct from the object's declared type.  */
3355   if (same_base_p)
3356     {
3357       if (indices_a->unconstrained_base)
3358         full_seq.length += 1;
3359     }
3360   else
3361     full_seq = struct_seq;
3362
3363   /* Punt if we didn't find a suitable sequence.  */
3364   if (full_seq.length == 0)
3365     {
3366       if (use_alt_indices
3367           || (TREE_CODE (DR_REF (a)) == MEM_REF
3368               && TREE_CODE (DR_REF (b)) == MEM_REF)
3369           || may_be_nonaddressable_p (DR_REF (a))
3370           || may_be_nonaddressable_p (DR_REF (b)))
3371         {
3372           /* Fully exhausted possibilities.  */
3373           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3374           return res;
3375         }
3376
3377       /* Try evaluating both DRs as dereferences of pointers.  */
3378       if (!a->alt_indices.base_object
3379           && TREE_CODE (DR_REF (a)) != MEM_REF)
3380         {
3381           tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
3382                                  build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
3383                                  build_int_cst
3384                                    (reference_alias_ptr_type (DR_REF (a)), 0));
3385           dr_analyze_indices (&a->alt_indices, alt_ref,
3386                               loop_preheader_edge (loop_nest[0]),
3387                               loop_containing_stmt (DR_STMT (a)));
3388         }
3389       if (!b->alt_indices.base_object
3390           && TREE_CODE (DR_REF (b)) != MEM_REF)
3391         {
3392           tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
3393                                  build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
3394                                  build_int_cst
3395                                    (reference_alias_ptr_type (DR_REF (b)), 0));
3396           dr_analyze_indices (&b->alt_indices, alt_ref,
3397                               loop_preheader_edge (loop_nest[0]),
3398                               loop_containing_stmt (DR_STMT (b)));
3399         }
3400       return initialize_data_dependence_relation (res, loop_nest, true);
3401     }
3402
3403   if (!same_base_p)
3404     {
3405       /* Partial overlap is possible for different bases when strict aliasing
3406          is not in effect.  It's also possible if either base involves a union
3407          access; e.g. for:
3408
3409            struct s1 { int a[2]; };
3410            struct s2 { struct s1 b; int c; };
3411            struct s3 { int d; struct s1 e; };
3412            union u { struct s2 f; struct s3 g; } *p, *q;
3413
3414          the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3415          "p->g.e" (base "p->g") and might partially overlap the s1 at
3416          "q->g.e" (base "q->g").  */
3417       if (!flag_strict_aliasing
3418           || ref_contains_union_access_p (full_seq.object_a)
3419           || ref_contains_union_access_p (full_seq.object_b))
3420         {
3421           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3422           return res;
3423         }
3424
3425       DDR_COULD_BE_INDEPENDENT_P (res) = true;
3426       if (!loop_nest.exists ()
3427           || (object_address_invariant_in_loop_p (loop_nest[0],
3428                                                   full_seq.object_a)
3429               && object_address_invariant_in_loop_p (loop_nest[0],
3430                                                      full_seq.object_b)))
3431         {
3432           DDR_OBJECT_A (res) = full_seq.object_a;
3433           DDR_OBJECT_B (res) = full_seq.object_b;
3434         }
3435     }
3436
3437   DDR_AFFINE_P (res) = true;
3438   DDR_ARE_DEPENDENT (res) = NULL_TREE;
3439   DDR_SUBSCRIPTS (res).create (full_seq.length);
3440   DDR_LOOP_NEST (res) = loop_nest;
3441   DDR_SELF_REFERENCE (res) = false;
3442
3443   for (i = 0; i < full_seq.length; ++i)
3444     {
3445       struct subscript *subscript;
3446
3447       subscript = XNEW (struct subscript);
3448       SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
3449       SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
3450       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3451       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3452       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3453       SUB_DISTANCE (subscript) = chrec_dont_know;
3454       DDR_SUBSCRIPTS (res).safe_push (subscript);
3455     }
3456
3457   return res;
3458 }
3459
3460 /* Initialize a data dependence relation between data accesses A and
3461    B.  NB_LOOPS is the number of loops surrounding the references: the
3462    size of the classic distance/direction vectors.  */
3463
3464 struct data_dependence_relation *
3465 initialize_data_dependence_relation (struct data_reference *a,
3466                                      struct data_reference *b,
3467                                      vec<loop_p> loop_nest)
3468 {
3469   data_dependence_relation *res = XCNEW (struct data_dependence_relation);
3470   DDR_A (res) = a;
3471   DDR_B (res) = b;
3472   DDR_LOOP_NEST (res).create (0);
3473   DDR_SUBSCRIPTS (res).create (0);
3474   DDR_DIR_VECTS (res).create (0);
3475   DDR_DIST_VECTS (res).create (0);
3476
3477   if (a == NULL || b == NULL)
3478     {
3479       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3480       return res;
3481     }
3482
3483   /* If the data references do not alias, then they are independent.  */
3484   if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
3485     {
3486       DDR_ARE_DEPENDENT (res) = chrec_known;
3487       return res;
3488     }
3489
3490   return initialize_data_dependence_relation (res, loop_nest, false);
3491 }
3492
3493
3494 /* Frees memory used by the conflict function F.  */
3495
3496 static void
3497 free_conflict_function (conflict_function *f)
3498 {
3499   unsigned i;
3500
3501   if (CF_NONTRIVIAL_P (f))
3502     {
3503       for (i = 0; i < f->n; i++)
3504         affine_fn_free (f->fns[i]);
3505     }
3506   free (f);
3507 }
3508
3509 /* Frees memory used by SUBSCRIPTS.  */
3510
3511 static void
3512 free_subscripts (vec<subscript_p> subscripts)
3513 {
3514   for (subscript_p s : subscripts)
3515     {
3516       free_conflict_function (s->conflicting_iterations_in_a);
3517       free_conflict_function (s->conflicting_iterations_in_b);
3518       free (s);
3519     }
3520   subscripts.release ();
3521 }
3522
3523 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3524    description.  */
3525
3526 static inline void
3527 finalize_ddr_dependent (struct data_dependence_relation *ddr,
3528                         tree chrec)
3529 {
3530   DDR_ARE_DEPENDENT (ddr) = chrec;
3531   free_subscripts (DDR_SUBSCRIPTS (ddr));
3532   DDR_SUBSCRIPTS (ddr).create (0);
3533 }
3534
3535 /* The dependence relation DDR cannot be represented by a distance
3536    vector.  */
3537
3538 static inline void
3539 non_affine_dependence_relation (struct data_dependence_relation *ddr)
3540 {
3541   if (dump_file && (dump_flags & TDF_DETAILS))
3542     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3543
3544   DDR_AFFINE_P (ddr) = false;
3545 }
3546
3547 \f
3548
3549 /* This section contains the classic Banerjee tests.  */
3550
3551 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3552    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
3553
3554 static inline bool
3555 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3556 {
3557   return (evolution_function_is_constant_p (chrec_a)
3558           && evolution_function_is_constant_p (chrec_b));
3559 }
3560
3561 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3562    variable, i.e., if the SIV (Single Index Variable) test is true.  */
3563
3564 static bool
3565 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3566 {
3567   if ((evolution_function_is_constant_p (chrec_a)
3568        && evolution_function_is_univariate_p (chrec_b))
3569       || (evolution_function_is_constant_p (chrec_b)
3570           && evolution_function_is_univariate_p (chrec_a)))
3571     return true;
3572
3573   if (evolution_function_is_univariate_p (chrec_a)
3574       && evolution_function_is_univariate_p (chrec_b))
3575     {
3576       switch (TREE_CODE (chrec_a))
3577         {
3578         case POLYNOMIAL_CHREC:
3579           switch (TREE_CODE (chrec_b))
3580             {
3581             case POLYNOMIAL_CHREC:
3582               if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3583                 return false;
3584               /* FALLTHRU */
3585
3586             default:
3587               return true;
3588             }
3589
3590         default:
3591           return true;
3592         }
3593     }
3594
3595   return false;
3596 }
3597
3598 /* Creates a conflict function with N dimensions.  The affine functions
3599    in each dimension follow.  */
3600
3601 static conflict_function *
3602 conflict_fn (unsigned n, ...)
3603 {
3604   unsigned i;
3605   conflict_function *ret = XCNEW (conflict_function);
3606   va_list ap;
3607
3608   gcc_assert (n > 0 && n <= MAX_DIM);
3609   va_start (ap, n);
3610
3611   ret->n = n;
3612   for (i = 0; i < n; i++)
3613     ret->fns[i] = va_arg (ap, affine_fn);
3614   va_end (ap);
3615
3616   return ret;
3617 }
3618
3619 /* Returns constant affine function with value CST.  */
3620
3621 static affine_fn
3622 affine_fn_cst (tree cst)
3623 {
3624   affine_fn fn;
3625   fn.create (1);
3626   fn.quick_push (cst);
3627   return fn;
3628 }
3629
3630 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
3631
3632 static affine_fn
3633 affine_fn_univar (tree cst, unsigned dim, tree coef)
3634 {
3635   affine_fn fn;
3636   fn.create (dim + 1);
3637   unsigned i;
3638
3639   gcc_assert (dim > 0);
3640   fn.quick_push (cst);
3641   for (i = 1; i < dim; i++)
3642     fn.quick_push (integer_zero_node);
3643   fn.quick_push (coef);
3644   return fn;
3645 }
3646
3647 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
3648    *OVERLAPS_B are initialized to the functions that describe the
3649    relation between the elements accessed twice by CHREC_A and
3650    CHREC_B.  For k >= 0, the following property is verified:
3651
3652    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3653
3654 static void
3655 analyze_ziv_subscript (tree chrec_a,
3656                        tree chrec_b,
3657                        conflict_function **overlaps_a,
3658                        conflict_function **overlaps_b,
3659                        tree *last_conflicts)
3660 {
3661   tree type, difference;
3662   dependence_stats.num_ziv++;
3663
3664   if (dump_file && (dump_flags & TDF_DETAILS))
3665     fprintf (dump_file, "(analyze_ziv_subscript \n");
3666
3667   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3668   chrec_a = chrec_convert (type, chrec_a, NULL);
3669   chrec_b = chrec_convert (type, chrec_b, NULL);
3670   difference = chrec_fold_minus (type, chrec_a, chrec_b);
3671
3672   switch (TREE_CODE (difference))
3673     {
3674     case INTEGER_CST:
3675       if (integer_zerop (difference))
3676         {
3677           /* The difference is equal to zero: the accessed index
3678              overlaps for each iteration in the loop.  */
3679           *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3680           *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3681           *last_conflicts = chrec_dont_know;
3682           dependence_stats.num_ziv_dependent++;
3683         }
3684       else
3685         {
3686           /* The accesses do not overlap.  */
3687           *overlaps_a = conflict_fn_no_dependence ();
3688           *overlaps_b = conflict_fn_no_dependence ();
3689           *last_conflicts = integer_zero_node;
3690           dependence_stats.num_ziv_independent++;
3691         }
3692       break;
3693
3694     default:
3695       /* We're not sure whether the indexes overlap.  For the moment,
3696          conservatively answer "don't know".  */
3697       if (dump_file && (dump_flags & TDF_DETAILS))
3698         fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3699
3700       *overlaps_a = conflict_fn_not_known ();
3701       *overlaps_b = conflict_fn_not_known ();
3702       *last_conflicts = chrec_dont_know;
3703       dependence_stats.num_ziv_unimplemented++;
3704       break;
3705     }
3706
3707   if (dump_file && (dump_flags & TDF_DETAILS))
3708     fprintf (dump_file, ")\n");
3709 }
3710
3711 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3712    and only if it fits to the int type.  If this is not the case, or the
3713    bound  on the number of iterations of LOOP could not be derived, returns
3714    chrec_dont_know.  */
3715
3716 static tree
3717 max_stmt_executions_tree (class loop *loop)
3718 {
3719   widest_int nit;
3720
3721   if (!max_stmt_executions (loop, &nit))
3722     return chrec_dont_know;
3723
3724   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3725     return chrec_dont_know;
3726
3727   return wide_int_to_tree (unsigned_type_node, nit);
3728 }
3729
3730 /* Determine whether the CHREC is always positive/negative.  If the expression
3731    cannot be statically analyzed, return false, otherwise set the answer into
3732    VALUE.  */
3733
3734 static bool
3735 chrec_is_positive (tree chrec, bool *value)
3736 {
3737   bool value0, value1, value2;
3738   tree end_value, nb_iter;
3739
3740   switch (TREE_CODE (chrec))
3741     {
3742     case POLYNOMIAL_CHREC:
3743       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3744           || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3745         return false;
3746
3747       /* FIXME -- overflows.  */
3748       if (value0 == value1)
3749         {
3750           *value = value0;
3751           return true;
3752         }
3753
3754       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3755          and the proof consists in showing that the sign never
3756          changes during the execution of the loop, from 0 to
3757          loop->nb_iterations.  */
3758       if (!evolution_function_is_affine_p (chrec))
3759         return false;
3760
3761       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3762       if (chrec_contains_undetermined (nb_iter))
3763         return false;
3764
3765 #if 0
3766       /* TODO -- If the test is after the exit, we may decrease the number of
3767          iterations by one.  */
3768       if (after_exit)
3769         nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3770 #endif
3771
3772       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3773
3774       if (!chrec_is_positive (end_value, &value2))
3775         return false;
3776
3777       *value = value0;
3778       return value0 == value1;
3779
3780     case INTEGER_CST:
3781       switch (tree_int_cst_sgn (chrec))
3782         {
3783         case -1:
3784           *value = false;
3785           break;
3786         case 1:
3787           *value = true;
3788           break;
3789         default:
3790           return false;
3791         }
3792       return true;
3793
3794     default:
3795       return false;
3796     }
3797 }
3798
3799
3800 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3801    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
3802    *OVERLAPS_B are initialized to the functions that describe the
3803    relation between the elements accessed twice by CHREC_A and
3804    CHREC_B.  For k >= 0, the following property is verified:
3805
3806    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3807
3808 static void
3809 analyze_siv_subscript_cst_affine (tree chrec_a,
3810                                   tree chrec_b,
3811                                   conflict_function **overlaps_a,
3812                                   conflict_function **overlaps_b,
3813                                   tree *last_conflicts)
3814 {
3815   bool value0, value1, value2;
3816   tree type, difference, tmp;
3817
3818   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3819   chrec_a = chrec_convert (type, chrec_a, NULL);
3820   chrec_b = chrec_convert (type, chrec_b, NULL);
3821   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3822
3823   /* Special case overlap in the first iteration.  */
3824   if (integer_zerop (difference))
3825     {
3826       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3827       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3828       *last_conflicts = integer_one_node;
3829       return;
3830     }
3831
3832   if (!chrec_is_positive (initial_condition (difference), &value0))
3833     {
3834       if (dump_file && (dump_flags & TDF_DETAILS))
3835         fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3836
3837       dependence_stats.num_siv_unimplemented++;
3838       *overlaps_a = conflict_fn_not_known ();
3839       *overlaps_b = conflict_fn_not_known ();
3840       *last_conflicts = chrec_dont_know;
3841       return;
3842     }
3843   else
3844     {
3845       if (value0 == false)
3846         {
3847           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3848               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3849             {
3850               if (dump_file && (dump_flags & TDF_DETAILS))
3851                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3852
3853               *overlaps_a = conflict_fn_not_known ();
3854               *overlaps_b = conflict_fn_not_known ();
3855               *last_conflicts = chrec_dont_know;
3856               dependence_stats.num_siv_unimplemented++;
3857               return;
3858             }
3859           else
3860             {
3861               if (value1 == true)
3862                 {
3863                   /* Example:
3864                      chrec_a = 12
3865                      chrec_b = {10, +, 1}
3866                   */
3867
3868                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3869                     {
3870                       HOST_WIDE_INT numiter;
3871                       class loop *loop = get_chrec_loop (chrec_b);
3872
3873                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3874                       tmp = fold_build2 (EXACT_DIV_EXPR, type,
3875                                          fold_build1 (ABS_EXPR, type, difference),
3876                                          CHREC_RIGHT (chrec_b));
3877                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3878                       *last_conflicts = integer_one_node;
3879
3880
3881                       /* Perform weak-zero siv test to see if overlap is
3882                          outside the loop bounds.  */
3883                       numiter = max_stmt_executions_int (loop);
3884
3885                       if (numiter >= 0
3886                           && compare_tree_int (tmp, numiter) > 0)
3887                         {
3888                           free_conflict_function (*overlaps_a);
3889                           free_conflict_function (*overlaps_b);
3890                           *overlaps_a = conflict_fn_no_dependence ();
3891                           *overlaps_b = conflict_fn_no_dependence ();
3892                           *last_conflicts = integer_zero_node;
3893                           dependence_stats.num_siv_independent++;
3894                           return;
3895                         }
3896                       dependence_stats.num_siv_dependent++;
3897                       return;
3898                     }
3899
3900                   /* When the step does not divide the difference, there are
3901                      no overlaps.  */
3902                   else
3903                     {
3904                       *overlaps_a = conflict_fn_no_dependence ();
3905                       *overlaps_b = conflict_fn_no_dependence ();
3906                       *last_conflicts = integer_zero_node;
3907                       dependence_stats.num_siv_independent++;
3908                       return;
3909                     }
3910                 }
3911
3912               else
3913                 {
3914                   /* Example:
3915                      chrec_a = 12
3916                      chrec_b = {10, +, -1}
3917
3918                      In this case, chrec_a will not overlap with chrec_b.  */
3919                   *overlaps_a = conflict_fn_no_dependence ();
3920                   *overlaps_b = conflict_fn_no_dependence ();
3921                   *last_conflicts = integer_zero_node;
3922                   dependence_stats.num_siv_independent++;
3923                   return;
3924                 }
3925             }
3926         }
3927       else
3928         {
3929           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3930               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3931             {
3932               if (dump_file && (dump_flags & TDF_DETAILS))
3933                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3934
3935               *overlaps_a = conflict_fn_not_known ();
3936               *overlaps_b = conflict_fn_not_known ();
3937               *last_conflicts = chrec_dont_know;
3938               dependence_stats.num_siv_unimplemented++;
3939               return;
3940             }
3941           else
3942             {
3943               if (value2 == false)
3944                 {
3945                   /* Example:
3946                      chrec_a = 3
3947                      chrec_b = {10, +, -1}
3948                   */
3949                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3950                     {
3951                       HOST_WIDE_INT numiter;
3952                       class loop *loop = get_chrec_loop (chrec_b);
3953
3954                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3955                       tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3956                                          CHREC_RIGHT (chrec_b));
3957                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3958                       *last_conflicts = integer_one_node;
3959
3960                       /* Perform weak-zero siv test to see if overlap is
3961                          outside the loop bounds.  */
3962                       numiter = max_stmt_executions_int (loop);
3963
3964                       if (numiter >= 0
3965                           && compare_tree_int (tmp, numiter) > 0)
3966                         {
3967                           free_conflict_function (*overlaps_a);
3968                           free_conflict_function (*overlaps_b);
3969                           *overlaps_a = conflict_fn_no_dependence ();
3970                           *overlaps_b = conflict_fn_no_dependence ();
3971                           *last_conflicts = integer_zero_node;
3972                           dependence_stats.num_siv_independent++;
3973                           return;
3974                         }
3975                       dependence_stats.num_siv_dependent++;
3976                       return;
3977                     }
3978
3979                   /* When the step does not divide the difference, there
3980                      are no overlaps.  */
3981                   else
3982                     {
3983                       *overlaps_a = conflict_fn_no_dependence ();
3984                       *overlaps_b = conflict_fn_no_dependence ();
3985                       *last_conflicts = integer_zero_node;
3986                       dependence_stats.num_siv_independent++;
3987                       return;
3988                     }
3989                 }
3990               else
3991                 {
3992                   /* Example:
3993                      chrec_a = 3
3994                      chrec_b = {4, +, 1}
3995
3996                      In this case, chrec_a will not overlap with chrec_b.  */
3997                   *overlaps_a = conflict_fn_no_dependence ();
3998                   *overlaps_b = conflict_fn_no_dependence ();
3999                   *last_conflicts = integer_zero_node;
4000                   dependence_stats.num_siv_independent++;
4001                   return;
4002                 }
4003             }
4004         }
4005     }
4006 }
4007
4008 /* Helper recursive function for initializing the matrix A.  Returns
4009    the initial value of CHREC.  */
4010
4011 static tree
4012 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
4013 {
4014   gcc_assert (chrec);
4015
4016   switch (TREE_CODE (chrec))
4017     {
4018     case POLYNOMIAL_CHREC:
4019       HOST_WIDE_INT chrec_right;
4020       if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
4021         return chrec_dont_know;
4022       chrec_right = int_cst_value (CHREC_RIGHT (chrec));
4023       /* We want to be able to negate without overflow.  */
4024       if (chrec_right == HOST_WIDE_INT_MIN)
4025         return chrec_dont_know;
4026       A[index][0] = mult * chrec_right;
4027       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
4028
4029     case PLUS_EXPR:
4030     case MULT_EXPR:
4031     case MINUS_EXPR:
4032       {
4033         tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4034         tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
4035
4036         return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
4037       }
4038
4039     CASE_CONVERT:
4040       {
4041         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4042         return chrec_convert (chrec_type (chrec), op, NULL);
4043       }
4044
4045     case BIT_NOT_EXPR:
4046       {
4047         /* Handle ~X as -1 - X.  */
4048         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4049         return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
4050                               build_int_cst (TREE_TYPE (chrec), -1), op);
4051       }
4052
4053     case INTEGER_CST:
4054       return chrec;
4055
4056     default:
4057       gcc_unreachable ();
4058       return NULL_TREE;
4059     }
4060 }
4061
4062 #define FLOOR_DIV(x,y) ((x) / (y))
4063
4064 /* Solves the special case of the Diophantine equation:
4065    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4066
4067    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
4068    number of iterations that loops X and Y run.  The overlaps will be
4069    constructed as evolutions in dimension DIM.  */
4070
4071 static void
4072 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
4073                                          HOST_WIDE_INT step_a,
4074                                          HOST_WIDE_INT step_b,
4075                                          affine_fn *overlaps_a,
4076                                          affine_fn *overlaps_b,
4077                                          tree *last_conflicts, int dim)
4078 {
4079   if (((step_a > 0 && step_b > 0)
4080        || (step_a < 0 && step_b < 0)))
4081     {
4082       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
4083       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
4084
4085       gcd_steps_a_b = gcd (step_a, step_b);
4086       step_overlaps_a = step_b / gcd_steps_a_b;
4087       step_overlaps_b = step_a / gcd_steps_a_b;
4088
4089       if (niter > 0)
4090         {
4091           tau2 = FLOOR_DIV (niter, step_overlaps_a);
4092           tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
4093           last_conflict = tau2;
4094           *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
4095         }
4096       else
4097         *last_conflicts = chrec_dont_know;
4098
4099       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
4100                                       build_int_cst (NULL_TREE,
4101                                                      step_overlaps_a));
4102       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
4103                                       build_int_cst (NULL_TREE,
4104                                                      step_overlaps_b));
4105     }
4106
4107   else
4108     {
4109       *overlaps_a = affine_fn_cst (integer_zero_node);
4110       *overlaps_b = affine_fn_cst (integer_zero_node);
4111       *last_conflicts = integer_zero_node;
4112     }
4113 }
4114
4115 /* Solves the special case of a Diophantine equation where CHREC_A is
4116    an affine bivariate function, and CHREC_B is an affine univariate
4117    function.  For example,
4118
4119    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4120
4121    has the following overlapping functions:
4122
4123    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4124    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4125    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4126
4127    FORNOW: This is a specialized implementation for a case occurring in
4128    a common benchmark.  Implement the general algorithm.  */
4129
4130 static void
4131 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
4132                                       conflict_function **overlaps_a,
4133                                       conflict_function **overlaps_b,
4134                                       tree *last_conflicts)
4135 {
4136   bool xz_p, yz_p, xyz_p;
4137   HOST_WIDE_INT step_x, step_y, step_z;
4138   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
4139   affine_fn overlaps_a_xz, overlaps_b_xz;
4140   affine_fn overlaps_a_yz, overlaps_b_yz;
4141   affine_fn overlaps_a_xyz, overlaps_b_xyz;
4142   affine_fn ova1, ova2, ovb;
4143   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
4144
4145   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
4146   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
4147   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
4148
4149   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
4150   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
4151   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
4152
4153   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
4154     {
4155       if (dump_file && (dump_flags & TDF_DETAILS))
4156         fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
4157
4158       *overlaps_a = conflict_fn_not_known ();
4159       *overlaps_b = conflict_fn_not_known ();
4160       *last_conflicts = chrec_dont_know;
4161       return;
4162     }
4163
4164   niter = MIN (niter_x, niter_z);
4165   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
4166                                            &overlaps_a_xz,
4167                                            &overlaps_b_xz,
4168                                            &last_conflicts_xz, 1);
4169   niter = MIN (niter_y, niter_z);
4170   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
4171                                            &overlaps_a_yz,
4172                                            &overlaps_b_yz,
4173                                            &last_conflicts_yz, 2);
4174   niter = MIN (niter_x, niter_z);
4175   niter = MIN (niter_y, niter);
4176   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
4177                                            &overlaps_a_xyz,
4178                                            &overlaps_b_xyz,
4179                                            &last_conflicts_xyz, 3);
4180
4181   xz_p = !integer_zerop (last_conflicts_xz);
4182   yz_p = !integer_zerop (last_conflicts_yz);
4183   xyz_p = !integer_zerop (last_conflicts_xyz);
4184
4185   if (xz_p || yz_p || xyz_p)
4186     {
4187       ova1 = affine_fn_cst (integer_zero_node);
4188       ova2 = affine_fn_cst (integer_zero_node);
4189       ovb = affine_fn_cst (integer_zero_node);
4190       if (xz_p)
4191         {
4192           affine_fn t0 = ova1;
4193           affine_fn t2 = ovb;
4194
4195           ova1 = affine_fn_plus (ova1, overlaps_a_xz);
4196           ovb = affine_fn_plus (ovb, overlaps_b_xz);
4197           affine_fn_free (t0);
4198           affine_fn_free (t2);
4199           *last_conflicts = last_conflicts_xz;
4200         }
4201       if (yz_p)
4202         {
4203           affine_fn t0 = ova2;
4204           affine_fn t2 = ovb;
4205
4206           ova2 = affine_fn_plus (ova2, overlaps_a_yz);
4207           ovb = affine_fn_plus (ovb, overlaps_b_yz);
4208           affine_fn_free (t0);
4209           affine_fn_free (t2);
4210           *last_conflicts = last_conflicts_yz;
4211         }
4212       if (xyz_p)
4213         {
4214           affine_fn t0 = ova1;
4215           affine_fn t2 = ova2;
4216           affine_fn t4 = ovb;
4217
4218           ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
4219           ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
4220           ovb = affine_fn_plus (ovb, overlaps_b_xyz);
4221           affine_fn_free (t0);
4222           affine_fn_free (t2);
4223           affine_fn_free (t4);
4224           *last_conflicts = last_conflicts_xyz;
4225         }
4226       *overlaps_a = conflict_fn (2, ova1, ova2);
4227       *overlaps_b = conflict_fn (1, ovb);
4228     }
4229   else
4230     {
4231       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4232       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4233       *last_conflicts = integer_zero_node;
4234     }
4235
4236   affine_fn_free (overlaps_a_xz);
4237   affine_fn_free (overlaps_b_xz);
4238   affine_fn_free (overlaps_a_yz);
4239   affine_fn_free (overlaps_b_yz);
4240   affine_fn_free (overlaps_a_xyz);
4241   affine_fn_free (overlaps_b_xyz);
4242 }
4243
4244 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
4245
4246 static void
4247 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
4248                     int size)
4249 {
4250   memcpy (vec2, vec1, size * sizeof (*vec1));
4251 }
4252
4253 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
4254
4255 static void
4256 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
4257                     int m, int n)
4258 {
4259   int i;
4260
4261   for (i = 0; i < m; i++)
4262     lambda_vector_copy (mat1[i], mat2[i], n);
4263 }
4264
4265 /* Store the N x N identity matrix in MAT.  */
4266
4267 static void
4268 lambda_matrix_id (lambda_matrix mat, int size)
4269 {
4270   int i, j;
4271
4272   for (i = 0; i < size; i++)
4273     for (j = 0; j < size; j++)
4274       mat[i][j] = (i == j) ? 1 : 0;
4275 }
4276
4277 /* Return the index of the first nonzero element of vector VEC1 between
4278    START and N.  We must have START <= N.
4279    Returns N if VEC1 is the zero vector.  */
4280
4281 static int
4282 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
4283 {
4284   int j = start;
4285   while (j < n && vec1[j] == 0)
4286     j++;
4287   return j;
4288 }
4289
4290 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4291    R2 = R2 + CONST1 * R1.  */
4292
4293 static bool
4294 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
4295                        lambda_int const1)
4296 {
4297   int i;
4298
4299   if (const1 == 0)
4300     return true;
4301
4302   for (i = 0; i < n; i++)
4303     {
4304       bool ovf;
4305       lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
4306       if (ovf)
4307         return false;
4308       lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
4309       if (ovf || tem2 == HOST_WIDE_INT_MIN)
4310         return false;
4311       mat[r2][i] = tem2;
4312     }
4313
4314   return true;
4315 }
4316
4317 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
4318    and store the result in VEC2.  */
4319
4320 static void
4321 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
4322                           int size, lambda_int const1)
4323 {
4324   int i;
4325
4326   if (const1 == 0)
4327     lambda_vector_clear (vec2, size);
4328   else
4329     for (i = 0; i < size; i++)
4330       vec2[i] = const1 * vec1[i];
4331 }
4332
4333 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
4334
4335 static void
4336 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
4337                       int size)
4338 {
4339   lambda_vector_mult_const (vec1, vec2, size, -1);
4340 }
4341
4342 /* Negate row R1 of matrix MAT which has N columns.  */
4343
4344 static void
4345 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
4346 {
4347   lambda_vector_negate (mat[r1], mat[r1], n);
4348 }
4349
4350 /* Return true if two vectors are equal.  */
4351
4352 static bool
4353 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
4354 {
4355   int i;
4356   for (i = 0; i < size; i++)
4357     if (vec1[i] != vec2[i])
4358       return false;
4359   return true;
4360 }
4361
4362 /* Given an M x N integer matrix A, this function determines an M x
4363    M unimodular matrix U, and an M x N echelon matrix S such that
4364    "U.A = S".  This decomposition is also known as "right Hermite".
4365
4366    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4367    Restructuring Compilers" Utpal Banerjee.  */
4368
4369 static bool
4370 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4371                              lambda_matrix S, lambda_matrix U)
4372 {
4373   int i, j, i0 = 0;
4374
4375   lambda_matrix_copy (A, S, m, n);
4376   lambda_matrix_id (U, m);
4377
4378   for (j = 0; j < n; j++)
4379     {
4380       if (lambda_vector_first_nz (S[j], m, i0) < m)
4381         {
4382           ++i0;
4383           for (i = m - 1; i >= i0; i--)
4384             {
4385               while (S[i][j] != 0)
4386                 {
4387                   lambda_int factor, a, b;
4388
4389                   a = S[i-1][j];
4390                   b = S[i][j];
4391                   gcc_assert (a != HOST_WIDE_INT_MIN);
4392                   factor = a / b;
4393
4394                   if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
4395                     return false;
4396                   std::swap (S[i], S[i-1]);
4397
4398                   if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
4399                     return false;
4400                   std::swap (U[i], U[i-1]);
4401                 }
4402             }
4403         }
4404     }
4405
4406   return true;
4407 }
4408
4409 /* Determines the overlapping elements due to accesses CHREC_A and
4410    CHREC_B, that are affine functions.  This function cannot handle
4411    symbolic evolution functions, ie. when initial conditions are
4412    parameters, because it uses lambda matrices of integers.  */
4413
4414 static void
4415 analyze_subscript_affine_affine (tree chrec_a,
4416                                  tree chrec_b,
4417                                  conflict_function **overlaps_a,
4418                                  conflict_function **overlaps_b,
4419                                  tree *last_conflicts)
4420 {
4421   unsigned nb_vars_a, nb_vars_b, dim;
4422   lambda_int gamma, gcd_alpha_beta;
4423   lambda_matrix A, U, S;
4424   struct obstack scratch_obstack;
4425
4426   if (eq_evolutions_p (chrec_a, chrec_b))
4427     {
4428       /* The accessed index overlaps for each iteration in the
4429          loop.  */
4430       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4431       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4432       *last_conflicts = chrec_dont_know;
4433       return;
4434     }
4435   if (dump_file && (dump_flags & TDF_DETAILS))
4436     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4437
4438   /* For determining the initial intersection, we have to solve a
4439      Diophantine equation.  This is the most time consuming part.
4440
4441      For answering to the question: "Is there a dependence?" we have
4442      to prove that there exists a solution to the Diophantine
4443      equation, and that the solution is in the iteration domain,
4444      i.e. the solution is positive or zero, and that the solution
4445      happens before the upper bound loop.nb_iterations.  Otherwise
4446      there is no dependence.  This function outputs a description of
4447      the iterations that hold the intersections.  */
4448
4449   nb_vars_a = nb_vars_in_chrec (chrec_a);
4450   nb_vars_b = nb_vars_in_chrec (chrec_b);
4451
4452   gcc_obstack_init (&scratch_obstack);
4453
4454   dim = nb_vars_a + nb_vars_b;
4455   U = lambda_matrix_new (dim, dim, &scratch_obstack);
4456   A = lambda_matrix_new (dim, 1, &scratch_obstack);
4457   S = lambda_matrix_new (dim, 1, &scratch_obstack);
4458
4459   tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4460   tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4461   if (init_a == chrec_dont_know
4462       || init_b == chrec_dont_know)
4463     {
4464       if (dump_file && (dump_flags & TDF_DETAILS))
4465         fprintf (dump_file, "affine-affine test failed: "
4466                  "representation issue.\n");
4467       *overlaps_a = conflict_fn_not_known ();
4468       *overlaps_b = conflict_fn_not_known ();
4469       *last_conflicts = chrec_dont_know;
4470       goto end_analyze_subs_aa;
4471     }
4472   gamma = int_cst_value (init_b) - int_cst_value (init_a);
4473
4474   /* Don't do all the hard work of solving the Diophantine equation
4475      when we already know the solution: for example,
4476      | {3, +, 1}_1
4477      | {3, +, 4}_2
4478      | gamma = 3 - 3 = 0.
4479      Then the first overlap occurs during the first iterations:
4480      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4481   */
4482   if (gamma == 0)
4483     {
4484       if (nb_vars_a == 1 && nb_vars_b == 1)
4485         {
4486           HOST_WIDE_INT step_a, step_b;
4487           HOST_WIDE_INT niter, niter_a, niter_b;
4488           affine_fn ova, ovb;
4489
4490           niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4491           niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4492           niter = MIN (niter_a, niter_b);
4493           step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4494           step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4495
4496           compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4497                                                    &ova, &ovb,
4498                                                    last_conflicts, 1);
4499           *overlaps_a = conflict_fn (1, ova);
4500           *overlaps_b = conflict_fn (1, ovb);
4501         }
4502
4503       else if (nb_vars_a == 2 && nb_vars_b == 1)
4504         compute_overlap_steps_for_affine_1_2
4505           (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4506
4507       else if (nb_vars_a == 1 && nb_vars_b == 2)
4508         compute_overlap_steps_for_affine_1_2
4509           (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4510
4511       else
4512         {
4513           if (dump_file && (dump_flags & TDF_DETAILS))
4514             fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4515           *overlaps_a = conflict_fn_not_known ();
4516           *overlaps_b = conflict_fn_not_known ();
4517           *last_conflicts = chrec_dont_know;
4518         }
4519       goto end_analyze_subs_aa;
4520     }
4521
4522   /* U.A = S */
4523   if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
4524     {
4525       *overlaps_a = conflict_fn_not_known ();
4526       *overlaps_b = conflict_fn_not_known ();
4527       *last_conflicts = chrec_dont_know;
4528       goto end_analyze_subs_aa;
4529     }
4530
4531   if (S[0][0] < 0)
4532     {
4533       S[0][0] *= -1;
4534       lambda_matrix_row_negate (U, dim, 0);
4535     }
4536   gcd_alpha_beta = S[0][0];
4537
4538   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4539      but that is a quite strange case.  Instead of ICEing, answer
4540      don't know.  */
4541   if (gcd_alpha_beta == 0)
4542     {
4543       *overlaps_a = conflict_fn_not_known ();
4544       *overlaps_b = conflict_fn_not_known ();
4545       *last_conflicts = chrec_dont_know;
4546       goto end_analyze_subs_aa;
4547     }
4548
4549   /* The classic "gcd-test".  */
4550   if (!int_divides_p (gcd_alpha_beta, gamma))
4551     {
4552       /* The "gcd-test" has determined that there is no integer
4553          solution, i.e. there is no dependence.  */
4554       *overlaps_a = conflict_fn_no_dependence ();
4555       *overlaps_b = conflict_fn_no_dependence ();
4556       *last_conflicts = integer_zero_node;
4557     }
4558
4559   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
4560   else if (nb_vars_a == 1 && nb_vars_b == 1)
4561     {
4562       /* Both functions should have the same evolution sign.  */
4563       if (((A[0][0] > 0 && -A[1][0] > 0)
4564            || (A[0][0] < 0 && -A[1][0] < 0)))
4565         {
4566           /* The solutions are given by:
4567              |
4568              | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
4569              |                           [u21 u22]    [y0]
4570
4571              For a given integer t.  Using the following variables,
4572
4573              | i0 = u11 * gamma / gcd_alpha_beta
4574              | j0 = u12 * gamma / gcd_alpha_beta
4575              | i1 = u21
4576              | j1 = u22
4577
4578              the solutions are:
4579
4580              | x0 = i0 + i1 * t,
4581              | y0 = j0 + j1 * t.  */
4582           HOST_WIDE_INT i0, j0, i1, j1;
4583
4584           i0 = U[0][0] * gamma / gcd_alpha_beta;
4585           j0 = U[0][1] * gamma / gcd_alpha_beta;
4586           i1 = U[1][0];
4587           j1 = U[1][1];
4588
4589           if ((i1 == 0 && i0 < 0)
4590               || (j1 == 0 && j0 < 0))
4591             {
4592               /* There is no solution.
4593                  FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4594                  falls in here, but for the moment we don't look at the
4595                  upper bound of the iteration domain.  */
4596               *overlaps_a = conflict_fn_no_dependence ();
4597               *overlaps_b = conflict_fn_no_dependence ();
4598               *last_conflicts = integer_zero_node;
4599               goto end_analyze_subs_aa;
4600             }
4601
4602           if (i1 > 0 && j1 > 0)
4603             {
4604               HOST_WIDE_INT niter_a
4605                 = max_stmt_executions_int (get_chrec_loop (chrec_a));
4606               HOST_WIDE_INT niter_b
4607                 = max_stmt_executions_int (get_chrec_loop (chrec_b));
4608               HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4609
4610               /* (X0, Y0) is a solution of the Diophantine equation:
4611                  "chrec_a (X0) = chrec_b (Y0)".  */
4612               HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4613                                         CEIL (-j0, j1));
4614               HOST_WIDE_INT x0 = i1 * tau1 + i0;
4615               HOST_WIDE_INT y0 = j1 * tau1 + j0;
4616
4617               /* (X1, Y1) is the smallest positive solution of the eq
4618                  "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4619                  first conflict occurs.  */
4620               HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4621               HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4622               HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4623
4624               if (niter > 0)
4625                 {
4626                   /* If the overlap occurs outside of the bounds of the
4627                      loop, there is no dependence.  */
4628                   if (x1 >= niter_a || y1 >= niter_b)
4629                     {
4630                       *overlaps_a = conflict_fn_no_dependence ();
4631                       *overlaps_b = conflict_fn_no_dependence ();
4632                       *last_conflicts = integer_zero_node;
4633                       goto end_analyze_subs_aa;
4634                     }
4635
4636                   /* max stmt executions can get quite large, avoid
4637                      overflows by using wide ints here.  */
4638                   widest_int tau2
4639                     = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4640                                 wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4641                   widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4642                   if (wi::min_precision (last_conflict, SIGNED)
4643                       <= TYPE_PRECISION (integer_type_node))
4644                     *last_conflicts
4645                        = build_int_cst (integer_type_node,
4646                                         last_conflict.to_shwi ());
4647                   else
4648                     *last_conflicts = chrec_dont_know;
4649                 }
4650               else
4651                 *last_conflicts = chrec_dont_know;
4652
4653               *overlaps_a
4654                 = conflict_fn (1,
4655                                affine_fn_univar (build_int_cst (NULL_TREE, x1),
4656                                                  1,
4657                                                  build_int_cst (NULL_TREE, i1)));
4658               *overlaps_b
4659                 = conflict_fn (1,
4660                                affine_fn_univar (build_int_cst (NULL_TREE, y1),
4661                                                  1,
4662                                                  build_int_cst (NULL_TREE, j1)));
4663             }
4664           else
4665             {
4666               /* FIXME: For the moment, the upper bound of the
4667                  iteration domain for i and j is not checked.  */
4668               if (dump_file && (dump_flags & TDF_DETAILS))
4669                 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4670               *overlaps_a = conflict_fn_not_known ();
4671               *overlaps_b = conflict_fn_not_known ();
4672               *last_conflicts = chrec_dont_know;
4673             }
4674         }
4675       else
4676         {
4677           if (dump_file && (dump_flags & TDF_DETAILS))
4678             fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4679           *overlaps_a = conflict_fn_not_known ();
4680           *overlaps_b = conflict_fn_not_known ();
4681           *last_conflicts = chrec_dont_know;
4682         }
4683     }
4684   else
4685     {
4686       if (dump_file && (dump_flags & TDF_DETAILS))
4687         fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4688       *overlaps_a = conflict_fn_not_known ();
4689       *overlaps_b = conflict_fn_not_known ();
4690       *last_conflicts = chrec_dont_know;
4691     }
4692
4693 end_analyze_subs_aa:
4694   obstack_free (&scratch_obstack, NULL);
4695   if (dump_file && (dump_flags & TDF_DETAILS))
4696     {
4697       fprintf (dump_file, "  (overlaps_a = ");
4698       dump_conflict_function (dump_file, *overlaps_a);
4699       fprintf (dump_file, ")\n  (overlaps_b = ");
4700       dump_conflict_function (dump_file, *overlaps_b);
4701       fprintf (dump_file, "))\n");
4702     }
4703 }
4704
4705 /* Returns true when analyze_subscript_affine_affine can be used for
4706    determining the dependence relation between chrec_a and chrec_b,
4707    that contain symbols.  This function modifies chrec_a and chrec_b
4708    such that the analysis result is the same, and such that they don't
4709    contain symbols, and then can safely be passed to the analyzer.
4710
4711    Example: The analysis of the following tuples of evolutions produce
4712    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4713    vs. {0, +, 1}_1
4714
4715    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4716    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4717 */
4718
4719 static bool
4720 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4721 {
4722   tree diff, type, left_a, left_b, right_b;
4723
4724   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4725       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4726     /* FIXME: For the moment not handled.  Might be refined later.  */
4727     return false;
4728
4729   type = chrec_type (*chrec_a);
4730   left_a = CHREC_LEFT (*chrec_a);
4731   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4732   diff = chrec_fold_minus (type, left_a, left_b);
4733
4734   if (!evolution_function_is_constant_p (diff))
4735     return false;
4736
4737   if (dump_file && (dump_flags & TDF_DETAILS))
4738     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4739
4740   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4741                                      diff, CHREC_RIGHT (*chrec_a));
4742   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4743   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4744                                      build_int_cst (type, 0),
4745                                      right_b);
4746   return true;
4747 }
4748
4749 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
4750    *OVERLAPS_B are initialized to the functions that describe the
4751    relation between the elements accessed twice by CHREC_A and
4752    CHREC_B.  For k >= 0, the following property is verified:
4753
4754    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4755
4756 static void
4757 analyze_siv_subscript (tree chrec_a,
4758                        tree chrec_b,
4759                        conflict_function **overlaps_a,
4760                        conflict_function **overlaps_b,
4761                        tree *last_conflicts,
4762                        int loop_nest_num)
4763 {
4764   dependence_stats.num_siv++;
4765
4766   if (dump_file && (dump_flags & TDF_DETAILS))
4767     fprintf (dump_file, "(analyze_siv_subscript \n");
4768
4769   if (evolution_function_is_constant_p (chrec_a)
4770       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4771     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4772                                       overlaps_a, overlaps_b, last_conflicts);
4773
4774   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4775            && evolution_function_is_constant_p (chrec_b))
4776     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4777                                       overlaps_b, overlaps_a, last_conflicts);
4778
4779   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4780            && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4781     {
4782       if (!chrec_contains_symbols (chrec_a)
4783           && !chrec_contains_symbols (chrec_b))
4784         {
4785           analyze_subscript_affine_affine (chrec_a, chrec_b,
4786                                            overlaps_a, overlaps_b,
4787                                            last_conflicts);
4788
4789           if (CF_NOT_KNOWN_P (*overlaps_a)
4790               || CF_NOT_KNOWN_P (*overlaps_b))
4791             dependence_stats.num_siv_unimplemented++;
4792           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4793                    || CF_NO_DEPENDENCE_P (*overlaps_b))
4794             dependence_stats.num_siv_independent++;
4795           else
4796             dependence_stats.num_siv_dependent++;
4797         }
4798       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4799                                                         &chrec_b))
4800         {
4801           analyze_subscript_affine_affine (chrec_a, chrec_b,
4802                                            overlaps_a, overlaps_b,
4803                                            last_conflicts);
4804
4805           if (CF_NOT_KNOWN_P (*overlaps_a)
4806               || CF_NOT_KNOWN_P (*overlaps_b))
4807             dependence_stats.num_siv_unimplemented++;
4808           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4809                    || CF_NO_DEPENDENCE_P (*overlaps_b))
4810             dependence_stats.num_siv_independent++;
4811           else
4812             dependence_stats.num_siv_dependent++;
4813         }
4814       else
4815         goto siv_subscript_dontknow;
4816     }
4817
4818   else
4819     {
4820     siv_subscript_dontknow:;
4821       if (dump_file && (dump_flags & TDF_DETAILS))
4822         fprintf (dump_file, "  siv test failed: unimplemented");
4823       *overlaps_a = conflict_fn_not_known ();
4824       *overlaps_b = conflict_fn_not_known ();
4825       *last_conflicts = chrec_dont_know;
4826       dependence_stats.num_siv_unimplemented++;
4827     }
4828
4829   if (dump_file && (dump_flags & TDF_DETAILS))
4830     fprintf (dump_file, ")\n");
4831 }
4832
4833 /* Returns false if we can prove that the greatest common divisor of the steps
4834    of CHREC does not divide CST, false otherwise.  */
4835
4836 static bool
4837 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4838 {
4839   HOST_WIDE_INT cd = 0, val;
4840   tree step;
4841
4842   if (!tree_fits_shwi_p (cst))
4843     return true;
4844   val = tree_to_shwi (cst);
4845
4846   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4847     {
4848       step = CHREC_RIGHT (chrec);
4849       if (!tree_fits_shwi_p (step))
4850         return true;
4851       cd = gcd (cd, tree_to_shwi (step));
4852       chrec = CHREC_LEFT (chrec);
4853     }
4854
4855   return val % cd == 0;
4856 }
4857
4858 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4859    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
4860    functions that describe the relation between the elements accessed
4861    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
4862    is verified:
4863
4864    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4865
4866 static void
4867 analyze_miv_subscript (tree chrec_a,
4868                        tree chrec_b,
4869                        conflict_function **overlaps_a,
4870                        conflict_function **overlaps_b,
4871                        tree *last_conflicts,
4872                        class loop *loop_nest)
4873 {
4874   tree type, difference;
4875
4876   dependence_stats.num_miv++;
4877   if (dump_file && (dump_flags & TDF_DETAILS))
4878     fprintf (dump_file, "(analyze_miv_subscript \n");
4879
4880   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4881   chrec_a = chrec_convert (type, chrec_a, NULL);
4882   chrec_b = chrec_convert (type, chrec_b, NULL);
4883   difference = chrec_fold_minus (type, chrec_a, chrec_b);
4884
4885   if (eq_evolutions_p (chrec_a, chrec_b))
4886     {
4887       /* Access functions are the same: all the elements are accessed
4888          in the same order.  */
4889       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4890       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4891       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4892       dependence_stats.num_miv_dependent++;
4893     }
4894
4895   else if (evolution_function_is_constant_p (difference)
4896            && evolution_function_is_affine_multivariate_p (chrec_a,
4897                                                            loop_nest->num)
4898            && !gcd_of_steps_may_divide_p (chrec_a, difference))
4899     {
4900       /* testsuite/.../ssa-chrec-33.c
4901          {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
4902
4903          The difference is 1, and all the evolution steps are multiples
4904          of 2, consequently there are no overlapping elements.  */
4905       *overlaps_a = conflict_fn_no_dependence ();
4906       *overlaps_b = conflict_fn_no_dependence ();
4907       *last_conflicts = integer_zero_node;
4908       dependence_stats.num_miv_independent++;
4909     }
4910
4911   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4912            && !chrec_contains_symbols (chrec_a, loop_nest)
4913            && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4914            && !chrec_contains_symbols (chrec_b, loop_nest))
4915     {
4916       /* testsuite/.../ssa-chrec-35.c
4917          {0, +, 1}_2  vs.  {0, +, 1}_3
4918          the overlapping elements are respectively located at iterations:
4919          {0, +, 1}_x and {0, +, 1}_x,
4920          in other words, we have the equality:
4921          {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4922
4923          Other examples:
4924          {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4925          {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4926
4927          {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4928          {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4929       */
4930       analyze_subscript_affine_affine (chrec_a, chrec_b,
4931                                        overlaps_a, overlaps_b, last_conflicts);
4932
4933       if (CF_NOT_KNOWN_P (*overlaps_a)
4934           || CF_NOT_KNOWN_P (*overlaps_b))
4935         dependence_stats.num_miv_unimplemented++;
4936       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4937                || CF_NO_DEPENDENCE_P (*overlaps_b))
4938         dependence_stats.num_miv_independent++;
4939       else
4940         dependence_stats.num_miv_dependent++;
4941     }
4942
4943   else
4944     {
4945       /* When the analysis is too difficult, answer "don't know".  */
4946       if (dump_file && (dump_flags & TDF_DETAILS))
4947         fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4948
4949       *overlaps_a = conflict_fn_not_known ();
4950       *overlaps_b = conflict_fn_not_known ();
4951       *last_conflicts = chrec_dont_know;
4952       dependence_stats.num_miv_unimplemented++;
4953     }
4954
4955   if (dump_file && (dump_flags & TDF_DETAILS))
4956     fprintf (dump_file, ")\n");
4957 }
4958
4959 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4960    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
4961    OVERLAP_ITERATIONS_B are initialized with two functions that
4962    describe the iterations that contain conflicting elements.
4963
4964    Remark: For an integer k >= 0, the following equality is true:
4965
4966    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4967 */
4968
4969 static void
4970 analyze_overlapping_iterations (tree chrec_a,
4971                                 tree chrec_b,
4972                                 conflict_function **overlap_iterations_a,
4973                                 conflict_function **overlap_iterations_b,
4974                                 tree *last_conflicts, class loop *loop_nest)
4975 {
4976   unsigned int lnn = loop_nest->num;
4977
4978   dependence_stats.num_subscript_tests++;
4979
4980   if (dump_file && (dump_flags & TDF_DETAILS))
4981     {
4982       fprintf (dump_file, "(analyze_overlapping_iterations \n");
4983       fprintf (dump_file, "  (chrec_a = ");
4984       print_generic_expr (dump_file, chrec_a);
4985       fprintf (dump_file, ")\n  (chrec_b = ");
4986       print_generic_expr (dump_file, chrec_b);
4987       fprintf (dump_file, ")\n");
4988     }
4989
4990   if (chrec_a == NULL_TREE
4991       || chrec_b == NULL_TREE
4992       || chrec_contains_undetermined (chrec_a)
4993       || chrec_contains_undetermined (chrec_b))
4994     {
4995       dependence_stats.num_subscript_undetermined++;
4996
4997       *overlap_iterations_a = conflict_fn_not_known ();
4998       *overlap_iterations_b = conflict_fn_not_known ();
4999     }
5000
5001   /* If they are the same chrec, and are affine, they overlap
5002      on every iteration.  */
5003   else if (eq_evolutions_p (chrec_a, chrec_b)
5004            && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5005                || operand_equal_p (chrec_a, chrec_b, 0)))
5006     {
5007       dependence_stats.num_same_subscript_function++;
5008       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
5009       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
5010       *last_conflicts = chrec_dont_know;
5011     }
5012
5013   /* If they aren't the same, and aren't affine, we can't do anything
5014      yet.  */
5015   else if ((chrec_contains_symbols (chrec_a)
5016             || chrec_contains_symbols (chrec_b))
5017            && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5018                || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
5019     {
5020       dependence_stats.num_subscript_undetermined++;
5021       *overlap_iterations_a = conflict_fn_not_known ();
5022       *overlap_iterations_b = conflict_fn_not_known ();
5023     }
5024
5025   else if (ziv_subscript_p (chrec_a, chrec_b))
5026     analyze_ziv_subscript (chrec_a, chrec_b,
5027                            overlap_iterations_a, overlap_iterations_b,
5028                            last_conflicts);
5029
5030   else if (siv_subscript_p (chrec_a, chrec_b))
5031     analyze_siv_subscript (chrec_a, chrec_b,
5032                            overlap_iterations_a, overlap_iterations_b,
5033                            last_conflicts, lnn);
5034
5035   else
5036     analyze_miv_subscript (chrec_a, chrec_b,
5037                            overlap_iterations_a, overlap_iterations_b,
5038                            last_conflicts, loop_nest);
5039
5040   if (dump_file && (dump_flags & TDF_DETAILS))
5041     {
5042       fprintf (dump_file, "  (overlap_iterations_a = ");
5043       dump_conflict_function (dump_file, *overlap_iterations_a);
5044       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
5045       dump_conflict_function (dump_file, *overlap_iterations_b);
5046       fprintf (dump_file, "))\n");
5047     }
5048 }
5049
5050 /* Helper function for uniquely inserting distance vectors.  */
5051
5052 static void
5053 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
5054 {
5055   for (lambda_vector v : DDR_DIST_VECTS (ddr))
5056     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
5057       return;
5058
5059   DDR_DIST_VECTS (ddr).safe_push (dist_v);
5060 }
5061
5062 /* Helper function for uniquely inserting direction vectors.  */
5063
5064 static void
5065 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
5066 {
5067   for (lambda_vector v : DDR_DIR_VECTS (ddr))
5068     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
5069       return;
5070
5071   DDR_DIR_VECTS (ddr).safe_push (dir_v);
5072 }
5073
5074 /* Add a distance of 1 on all the loops outer than INDEX.  If we
5075    haven't yet determined a distance for this outer loop, push a new
5076    distance vector composed of the previous distance, and a distance
5077    of 1 for this outer loop.  Example:
5078
5079    | loop_1
5080    |   loop_2
5081    |     A[10]
5082    |   endloop_2
5083    | endloop_1
5084
5085    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
5086    save (0, 1), then we have to save (1, 0).  */
5087
5088 static void
5089 add_outer_distances (struct data_dependence_relation *ddr,
5090                      lambda_vector dist_v, int index)
5091 {
5092   /* For each outer loop where init_v is not set, the accesses are
5093      in dependence of distance 1 in the loop.  */
5094   while (--index >= 0)
5095     {
5096       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5097       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5098       save_v[index] = 1;
5099       save_dist_v (ddr, save_v);
5100     }
5101 }
5102
5103 /* Return false when fail to represent the data dependence as a
5104    distance vector.  A_INDEX is the index of the first reference
5105    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5106    second reference.  INIT_B is set to true when a component has been
5107    added to the distance vector DIST_V.  INDEX_CARRY is then set to
5108    the index in DIST_V that carries the dependence.  */
5109
5110 static bool
5111 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
5112                              unsigned int a_index, unsigned int b_index,
5113                              lambda_vector dist_v, bool *init_b,
5114                              int *index_carry)
5115 {
5116   unsigned i;
5117   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5118   class loop *loop = DDR_LOOP_NEST (ddr)[0];
5119
5120   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5121     {
5122       tree access_fn_a, access_fn_b;
5123       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
5124
5125       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5126         {
5127           non_affine_dependence_relation (ddr);
5128           return false;
5129         }
5130
5131       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
5132       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
5133
5134       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
5135           && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
5136         {
5137           HOST_WIDE_INT dist;
5138           int index;
5139           int var_a = CHREC_VARIABLE (access_fn_a);
5140           int var_b = CHREC_VARIABLE (access_fn_b);
5141
5142           if (var_a != var_b
5143               || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5144             {
5145               non_affine_dependence_relation (ddr);
5146               return false;
5147             }
5148
5149           /* When data references are collected in a loop while data
5150              dependences are analyzed in loop nest nested in the loop, we
5151              would have more number of access functions than number of
5152              loops.  Skip access functions of loops not in the loop nest.
5153
5154              See PR89725 for more information.  */
5155           if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
5156             continue;
5157
5158           dist = int_cst_value (SUB_DISTANCE (subscript));
5159           index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
5160           *index_carry = MIN (index, *index_carry);
5161
5162           /* This is the subscript coupling test.  If we have already
5163              recorded a distance for this loop (a distance coming from
5164              another subscript), it should be the same.  For example,
5165              in the following code, there is no dependence:
5166
5167              | loop i = 0, N, 1
5168              |   T[i+1][i] = ...
5169              |   ... = T[i][i]
5170              | endloop
5171           */
5172           if (init_v[index] != 0 && dist_v[index] != dist)
5173             {
5174               finalize_ddr_dependent (ddr, chrec_known);
5175               return false;
5176             }
5177
5178           dist_v[index] = dist;
5179           init_v[index] = 1;
5180           *init_b = true;
5181         }
5182       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
5183         {
5184           /* This can be for example an affine vs. constant dependence
5185              (T[i] vs. T[3]) that is not an affine dependence and is
5186              not representable as a distance vector.  */
5187           non_affine_dependence_relation (ddr);
5188           return false;
5189         }
5190       else
5191         *init_b = true;
5192     }
5193
5194   return true;
5195 }
5196
5197 /* Return true when the DDR contains only invariant access functions wrto. loop
5198    number LNUM.  */
5199
5200 static bool
5201 invariant_access_functions (const struct data_dependence_relation *ddr,
5202                             int lnum)
5203 {
5204   for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5205     if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
5206         || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
5207       return false;
5208
5209   return true;
5210 }
5211
5212 /* Helper function for the case where DDR_A and DDR_B are the same
5213    multivariate access function with a constant step.  For an example
5214    see pr34635-1.c.  */
5215
5216 static void
5217 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
5218 {
5219   int x_1, x_2;
5220   tree c_1 = CHREC_LEFT (c_2);
5221   tree c_0 = CHREC_LEFT (c_1);
5222   lambda_vector dist_v;
5223   HOST_WIDE_INT v1, v2, cd;
5224
5225   /* Polynomials with more than 2 variables are not handled yet.  When
5226      the evolution steps are parameters, it is not possible to
5227      represent the dependence using classical distance vectors.  */
5228   if (TREE_CODE (c_0) != INTEGER_CST
5229       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
5230       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
5231     {
5232       DDR_AFFINE_P (ddr) = false;
5233       return;
5234     }
5235
5236   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
5237   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
5238
5239   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
5240   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5241   v1 = int_cst_value (CHREC_RIGHT (c_1));
5242   v2 = int_cst_value (CHREC_RIGHT (c_2));
5243   cd = gcd (v1, v2);
5244   v1 /= cd;
5245   v2 /= cd;
5246
5247   if (v2 < 0)
5248     {
5249       v2 = -v2;
5250       v1 = -v1;
5251     }
5252
5253   dist_v[x_1] = v2;
5254   dist_v[x_2] = -v1;
5255   save_dist_v (ddr, dist_v);
5256
5257   add_outer_distances (ddr, dist_v, x_1);
5258 }
5259
5260 /* Helper function for the case where DDR_A and DDR_B are the same
5261    access functions.  */
5262
5263 static void
5264 add_other_self_distances (struct data_dependence_relation *ddr)
5265 {
5266   lambda_vector dist_v;
5267   unsigned i;
5268   int index_carry = DDR_NB_LOOPS (ddr);
5269   subscript *sub;
5270   class loop *loop = DDR_LOOP_NEST (ddr)[0];
5271
5272   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5273     {
5274       tree access_fun = SUB_ACCESS_FN (sub, 0);
5275
5276       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
5277         {
5278           if (!evolution_function_is_univariate_p (access_fun, loop->num))
5279             {
5280               if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
5281                 {
5282                   DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
5283                   return;
5284                 }
5285
5286               access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
5287
5288               if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
5289                 add_multivariate_self_dist (ddr, access_fun);
5290               else
5291                 /* The evolution step is not constant: it varies in
5292                    the outer loop, so this cannot be represented by a
5293                    distance vector.  For example in pr34635.c the
5294                    evolution is {0, +, {0, +, 4}_1}_2.  */
5295                 DDR_AFFINE_P (ddr) = false;
5296
5297               return;
5298             }
5299
5300           /* When data references are collected in a loop while data
5301              dependences are analyzed in loop nest nested in the loop, we
5302              would have more number of access functions than number of
5303              loops.  Skip access functions of loops not in the loop nest.
5304
5305              See PR89725 for more information.  */
5306           if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
5307                                   loop))
5308             continue;
5309
5310           index_carry = MIN (index_carry,
5311                              index_in_loop_nest (CHREC_VARIABLE (access_fun),
5312                                                  DDR_LOOP_NEST (ddr)));
5313         }
5314     }
5315
5316   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5317   add_outer_distances (ddr, dist_v, index_carry);
5318 }
5319
5320 static void
5321 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
5322 {
5323   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5324
5325   dist_v[0] = 1;
5326   save_dist_v (ddr, dist_v);
5327 }
5328
5329 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
5330    is the case for example when access functions are the same and
5331    equal to a constant, as in:
5332
5333    | loop_1
5334    |   A[3] = ...
5335    |   ... = A[3]
5336    | endloop_1
5337
5338    in which case the distance vectors are (0) and (1).  */
5339
5340 static void
5341 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
5342 {
5343   unsigned i, j;
5344
5345   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5346     {
5347       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
5348       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
5349       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
5350
5351       for (j = 0; j < ca->n; j++)
5352         if (affine_function_zero_p (ca->fns[j]))
5353           {
5354             insert_innermost_unit_dist_vector (ddr);
5355             return;
5356           }
5357
5358       for (j = 0; j < cb->n; j++)
5359         if (affine_function_zero_p (cb->fns[j]))
5360           {
5361             insert_innermost_unit_dist_vector (ddr);
5362             return;
5363           }
5364     }
5365 }
5366
5367 /* Return true when the DDR contains two data references that have the
5368    same access functions.  */
5369
5370 static inline bool
5371 same_access_functions (const struct data_dependence_relation *ddr)
5372 {
5373   for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5374     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5375                           SUB_ACCESS_FN (sub, 1)))
5376       return false;
5377
5378   return true;
5379 }
5380
5381 /* Compute the classic per loop distance vector.  DDR is the data
5382    dependence relation to build a vector from.  Return false when fail
5383    to represent the data dependence as a distance vector.  */
5384
5385 static bool
5386 build_classic_dist_vector (struct data_dependence_relation *ddr,
5387                            class loop *loop_nest)
5388 {
5389   bool init_b = false;
5390   int index_carry = DDR_NB_LOOPS (ddr);
5391   lambda_vector dist_v;
5392
5393   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5394     return false;
5395
5396   if (same_access_functions (ddr))
5397     {
5398       /* Save the 0 vector.  */
5399       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5400       save_dist_v (ddr, dist_v);
5401
5402       if (invariant_access_functions (ddr, loop_nest->num))
5403         add_distance_for_zero_overlaps (ddr);
5404
5405       if (DDR_NB_LOOPS (ddr) > 1)
5406         add_other_self_distances (ddr);
5407
5408       return true;
5409     }
5410
5411   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5412   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5413     return false;
5414
5415   /* Save the distance vector if we initialized one.  */
5416   if (init_b)
5417     {
5418       /* Verify a basic constraint: classic distance vectors should
5419          always be lexicographically positive.
5420
5421          Data references are collected in the order of execution of
5422          the program, thus for the following loop
5423
5424          | for (i = 1; i < 100; i++)
5425          |   for (j = 1; j < 100; j++)
5426          |     {
5427          |       t = T[j+1][i-1];  // A
5428          |       T[j][i] = t + 2;  // B
5429          |     }
5430
5431          references are collected following the direction of the wind:
5432          A then B.  The data dependence tests are performed also
5433          following this order, such that we're looking at the distance
5434          separating the elements accessed by A from the elements later
5435          accessed by B.  But in this example, the distance returned by
5436          test_dep (A, B) is lexicographically negative (-1, 1), that
5437          means that the access A occurs later than B with respect to
5438          the outer loop, ie. we're actually looking upwind.  In this
5439          case we solve test_dep (B, A) looking downwind to the
5440          lexicographically positive solution, that returns the
5441          distance vector (1, -1).  */
5442       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5443         {
5444           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5445           if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5446             return false;
5447           compute_subscript_distance (ddr);
5448           if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5449                                             &index_carry))
5450             return false;
5451           save_dist_v (ddr, save_v);
5452           DDR_REVERSED_P (ddr) = true;
5453
5454           /* In this case there is a dependence forward for all the
5455              outer loops:
5456
5457              | for (k = 1; k < 100; k++)
5458              |  for (i = 1; i < 100; i++)
5459              |   for (j = 1; j < 100; j++)
5460              |     {
5461              |       t = T[j+1][i-1];  // A
5462              |       T[j][i] = t + 2;  // B
5463              |     }
5464
5465              the vectors are:
5466              (0,  1, -1)
5467              (1,  1, -1)
5468              (1, -1,  1)
5469           */
5470           if (DDR_NB_LOOPS (ddr) > 1)
5471             {
5472               add_outer_distances (ddr, save_v, index_carry);
5473               add_outer_distances (ddr, dist_v, index_carry);
5474             }
5475         }
5476       else
5477         {
5478           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5479           lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5480
5481           if (DDR_NB_LOOPS (ddr) > 1)
5482             {
5483               lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5484
5485               if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5486                 return false;
5487               compute_subscript_distance (ddr);
5488               if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5489                                                 &index_carry))
5490                 return false;
5491
5492               save_dist_v (ddr, save_v);
5493               add_outer_distances (ddr, dist_v, index_carry);
5494               add_outer_distances (ddr, opposite_v, index_carry);
5495             }
5496           else
5497             save_dist_v (ddr, save_v);
5498         }
5499     }
5500   else
5501     {
5502       /* There is a distance of 1 on all the outer loops: Example:
5503          there is a dependence of distance 1 on loop_1 for the array A.
5504
5505          | loop_1
5506          |   A[5] = ...
5507          | endloop
5508       */
5509       add_outer_distances (ddr, dist_v,
5510                            lambda_vector_first_nz (dist_v,
5511                                                    DDR_NB_LOOPS (ddr), 0));
5512     }
5513
5514   if (dump_file && (dump_flags & TDF_DETAILS))
5515     {
5516       unsigned i;
5517
5518       fprintf (dump_file, "(build_classic_dist_vector\n");
5519       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5520         {
5521           fprintf (dump_file, "  dist_vector = (");
5522           print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5523                                DDR_NB_LOOPS (ddr));
5524           fprintf (dump_file, "  )\n");
5525         }
5526       fprintf (dump_file, ")\n");
5527     }
5528
5529   return true;
5530 }
5531
5532 /* Return the direction for a given distance.
5533    FIXME: Computing dir this way is suboptimal, since dir can catch
5534    cases that dist is unable to represent.  */
5535
5536 static inline enum data_dependence_direction
5537 dir_from_dist (int dist)
5538 {
5539   if (dist > 0)
5540     return dir_positive;
5541   else if (dist < 0)
5542     return dir_negative;
5543   else
5544     return dir_equal;
5545 }
5546
5547 /* Compute the classic per loop direction vector.  DDR is the data
5548    dependence relation to build a vector from.  */
5549
5550 static void
5551 build_classic_dir_vector (struct data_dependence_relation *ddr)
5552 {
5553   unsigned i, j;
5554   lambda_vector dist_v;
5555
5556   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5557     {
5558       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5559
5560       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5561         dir_v[j] = dir_from_dist (dist_v[j]);
5562
5563       save_dir_v (ddr, dir_v);
5564     }
5565 }
5566
5567 /* Helper function.  Returns true when there is a dependence between the
5568    data references.  A_INDEX is the index of the first reference (0 for
5569    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
5570
5571 static bool
5572 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5573                                unsigned int a_index, unsigned int b_index,
5574                                class loop *loop_nest)
5575 {
5576   unsigned int i;
5577   tree last_conflicts;
5578   struct subscript *subscript;
5579   tree res = NULL_TREE;
5580
5581   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5582     {
5583       conflict_function *overlaps_a, *overlaps_b;
5584
5585       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5586                                       SUB_ACCESS_FN (subscript, b_index),
5587                                       &overlaps_a, &overlaps_b,
5588                                       &last_conflicts, loop_nest);
5589
5590       if (SUB_CONFLICTS_IN_A (subscript))
5591         free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5592       if (SUB_CONFLICTS_IN_B (subscript))
5593         free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5594
5595       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5596       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5597       SUB_LAST_CONFLICT (subscript) = last_conflicts;
5598
5599       /* If there is any undetermined conflict function we have to
5600          give a conservative answer in case we cannot prove that
5601          no dependence exists when analyzing another subscript.  */
5602       if (CF_NOT_KNOWN_P (overlaps_a)
5603           || CF_NOT_KNOWN_P (overlaps_b))
5604         {
5605           res = chrec_dont_know;
5606           continue;
5607         }
5608
5609       /* When there is a subscript with no dependence we can stop.  */
5610       else if (CF_NO_DEPENDENCE_P (overlaps_a)
5611                || CF_NO_DEPENDENCE_P (overlaps_b))
5612         {
5613           res = chrec_known;
5614           break;
5615         }
5616     }
5617
5618   if (res == NULL_TREE)
5619     return true;
5620
5621   if (res == chrec_known)
5622     dependence_stats.num_dependence_independent++;
5623   else
5624     dependence_stats.num_dependence_undetermined++;
5625   finalize_ddr_dependent (ddr, res);
5626   return false;
5627 }
5628
5629 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
5630
5631 static void
5632 subscript_dependence_tester (struct data_dependence_relation *ddr,
5633                              class loop *loop_nest)
5634 {
5635   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5636     dependence_stats.num_dependence_dependent++;
5637
5638   compute_subscript_distance (ddr);
5639   if (build_classic_dist_vector (ddr, loop_nest))
5640     build_classic_dir_vector (ddr);
5641 }
5642
5643 /* Returns true when all the access functions of A are affine or
5644    constant with respect to LOOP_NEST.  */
5645
5646 static bool
5647 access_functions_are_affine_or_constant_p (const struct data_reference *a,
5648                                            const class loop *loop_nest)
5649 {
5650   vec<tree> fns = DR_ACCESS_FNS (a);
5651   for (tree t : fns)
5652     if (!evolution_function_is_invariant_p (t, loop_nest->num)
5653         && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5654       return false;
5655
5656   return true;
5657 }
5658
5659 /* This computes the affine dependence relation between A and B with
5660    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
5661    independence between two accesses, while CHREC_DONT_KNOW is used
5662    for representing the unknown relation.
5663
5664    Note that it is possible to stop the computation of the dependence
5665    relation the first time we detect a CHREC_KNOWN element for a given
5666    subscript.  */
5667
5668 void
5669 compute_affine_dependence (struct data_dependence_relation *ddr,
5670                            class loop *loop_nest)
5671 {
5672   struct data_reference *dra = DDR_A (ddr);
5673   struct data_reference *drb = DDR_B (ddr);
5674
5675   if (dump_file && (dump_flags & TDF_DETAILS))
5676     {
5677       fprintf (dump_file, "(compute_affine_dependence\n");
5678       fprintf (dump_file, "  ref_a: ");
5679       print_generic_expr (dump_file, DR_REF (dra));
5680       fprintf (dump_file, ", stmt_a: ");
5681       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5682       fprintf (dump_file, "  ref_b: ");
5683       print_generic_expr (dump_file, DR_REF (drb));
5684       fprintf (dump_file, ", stmt_b: ");
5685       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5686     }
5687
5688   /* Analyze only when the dependence relation is not yet known.  */
5689   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5690     {
5691       dependence_stats.num_dependence_tests++;
5692
5693       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5694           && access_functions_are_affine_or_constant_p (drb, loop_nest))
5695         subscript_dependence_tester (ddr, loop_nest);
5696
5697       /* As a last case, if the dependence cannot be determined, or if
5698          the dependence is considered too difficult to determine, answer
5699          "don't know".  */
5700       else
5701         {
5702           dependence_stats.num_dependence_undetermined++;
5703
5704           if (dump_file && (dump_flags & TDF_DETAILS))
5705             {
5706               fprintf (dump_file, "Data ref a:\n");
5707               dump_data_reference (dump_file, dra);
5708               fprintf (dump_file, "Data ref b:\n");
5709               dump_data_reference (dump_file, drb);
5710               fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5711             }
5712           finalize_ddr_dependent (ddr, chrec_dont_know);
5713         }
5714     }
5715
5716   if (dump_file && (dump_flags & TDF_DETAILS))
5717     {
5718       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5719         fprintf (dump_file, ") -> no dependence\n");
5720       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5721         fprintf (dump_file, ") -> dependence analysis failed\n");
5722       else
5723         fprintf (dump_file, ")\n");
5724     }
5725 }
5726
5727 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5728    the data references in DATAREFS, in the LOOP_NEST.  When
5729    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5730    relations.  Return true when successful, i.e. data references number
5731    is small enough to be handled.  */
5732
5733 bool
5734 compute_all_dependences (const vec<data_reference_p> &datarefs,
5735                          vec<ddr_p> *dependence_relations,
5736                          const vec<loop_p> &loop_nest,
5737                          bool compute_self_and_rr)
5738 {
5739   struct data_dependence_relation *ddr;
5740   struct data_reference *a, *b;
5741   unsigned int i, j;
5742
5743   if ((int) datarefs.length ()
5744       > param_loop_max_datarefs_for_datadeps)
5745     {
5746       struct data_dependence_relation *ddr;
5747
5748       /* Insert a single relation into dependence_relations:
5749          chrec_dont_know.  */
5750       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5751       dependence_relations->safe_push (ddr);
5752       return false;
5753     }
5754
5755   FOR_EACH_VEC_ELT (datarefs, i, a)
5756     for (j = i + 1; datarefs.iterate (j, &b); j++)
5757       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5758         {
5759           ddr = initialize_data_dependence_relation (a, b, loop_nest);
5760           dependence_relations->safe_push (ddr);
5761           if (loop_nest.exists ())
5762             compute_affine_dependence (ddr, loop_nest[0]);
5763         }
5764
5765   if (compute_self_and_rr)
5766     FOR_EACH_VEC_ELT (datarefs, i, a)
5767       {
5768         ddr = initialize_data_dependence_relation (a, a, loop_nest);
5769         dependence_relations->safe_push (ddr);
5770         if (loop_nest.exists ())
5771           compute_affine_dependence (ddr, loop_nest[0]);
5772       }
5773
5774   return true;
5775 }
5776
5777 /* Describes a location of a memory reference.  */
5778
5779 struct data_ref_loc
5780 {
5781   /* The memory reference.  */
5782   tree ref;
5783
5784   /* True if the memory reference is read.  */
5785   bool is_read;
5786
5787   /* True if the data reference is conditional within the containing
5788      statement, i.e. if it might not occur even when the statement
5789      is executed and runs to completion.  */
5790   bool is_conditional_in_stmt;
5791 };
5792
5793
5794 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
5795    true if STMT clobbers memory, false otherwise.  */
5796
5797 static bool
5798 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5799 {
5800   bool clobbers_memory = false;
5801   data_ref_loc ref;
5802   tree op0, op1;
5803   enum gimple_code stmt_code = gimple_code (stmt);
5804
5805   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5806      As we cannot model data-references to not spelled out
5807      accesses give up if they may occur.  */
5808   if (stmt_code == GIMPLE_CALL
5809       && !(gimple_call_flags (stmt) & ECF_CONST))
5810     {
5811       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
5812       if (gimple_call_internal_p (stmt))
5813         switch (gimple_call_internal_fn (stmt))
5814           {
5815           case IFN_GOMP_SIMD_LANE:
5816             {
5817               class loop *loop = gimple_bb (stmt)->loop_father;
5818               tree uid = gimple_call_arg (stmt, 0);
5819               gcc_assert (TREE_CODE (uid) == SSA_NAME);
5820               if (loop == NULL
5821                   || loop->simduid != SSA_NAME_VAR (uid))
5822                 clobbers_memory = true;
5823               break;
5824             }
5825           case IFN_MASK_LOAD:
5826           case IFN_MASK_STORE:
5827           break;
5828           case IFN_MASK_CALL:
5829             {
5830               tree orig_fndecl
5831                 = gimple_call_addr_fndecl (gimple_call_arg (stmt, 0));
5832               if (!orig_fndecl
5833                   || (flags_from_decl_or_type (orig_fndecl) & ECF_CONST) == 0)
5834                 clobbers_memory = true;
5835             }
5836             break;
5837           default:
5838             clobbers_memory = true;
5839             break;
5840           }
5841       else
5842         clobbers_memory = true;
5843     }
5844   else if (stmt_code == GIMPLE_ASM
5845            && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5846                || gimple_vuse (stmt)))
5847     clobbers_memory = true;
5848
5849   if (!gimple_vuse (stmt))
5850     return clobbers_memory;
5851
5852   if (stmt_code == GIMPLE_ASSIGN)
5853     {
5854       tree base;
5855       op0 = gimple_assign_lhs (stmt);
5856       op1 = gimple_assign_rhs1 (stmt);
5857
5858       if (DECL_P (op1)
5859           || (REFERENCE_CLASS_P (op1)
5860               && (base = get_base_address (op1))
5861               && TREE_CODE (base) != SSA_NAME
5862               && !is_gimple_min_invariant (base)))
5863         {
5864           ref.ref = op1;
5865           ref.is_read = true;
5866           ref.is_conditional_in_stmt = false;
5867           references->safe_push (ref);
5868         }
5869     }
5870   else if (stmt_code == GIMPLE_CALL)
5871     {
5872       unsigned i = 0, n;
5873       tree ptr, type;
5874       unsigned int align;
5875
5876       ref.is_read = false;
5877       if (gimple_call_internal_p (stmt))
5878         switch (gimple_call_internal_fn (stmt))
5879           {
5880           case IFN_MASK_LOAD:
5881             if (gimple_call_lhs (stmt) == NULL_TREE)
5882               break;
5883             ref.is_read = true;
5884             /* FALLTHRU */
5885           case IFN_MASK_STORE:
5886             ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5887             align = tree_to_shwi (gimple_call_arg (stmt, 1));
5888             if (ref.is_read)
5889               type = TREE_TYPE (gimple_call_lhs (stmt));
5890             else
5891               type = TREE_TYPE (gimple_call_arg (stmt, 3));
5892             if (TYPE_ALIGN (type) != align)
5893               type = build_aligned_type (type, align);
5894             ref.is_conditional_in_stmt = true;
5895             ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5896                                    ptr);
5897             references->safe_push (ref);
5898             return false;
5899           case IFN_MASK_CALL:
5900             i = 1;
5901             gcc_fallthrough ();
5902           default:
5903             break;
5904           }
5905
5906       op0 = gimple_call_lhs (stmt);
5907       n = gimple_call_num_args (stmt);
5908       for (; i < n; i++)
5909         {
5910           op1 = gimple_call_arg (stmt, i);
5911
5912           if (DECL_P (op1)
5913               || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5914             {
5915               ref.ref = op1;
5916               ref.is_read = true;
5917               ref.is_conditional_in_stmt = false;
5918               references->safe_push (ref);
5919             }
5920         }
5921     }
5922   else
5923     return clobbers_memory;
5924
5925   if (op0
5926       && (DECL_P (op0)
5927           || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5928     {
5929       ref.ref = op0;
5930       ref.is_read = false;
5931       ref.is_conditional_in_stmt = false;
5932       references->safe_push (ref);
5933     }
5934   return clobbers_memory;
5935 }
5936
5937
5938 /* Returns true if the loop-nest has any data reference.  */
5939
5940 bool
5941 loop_nest_has_data_refs (loop_p loop)
5942 {
5943   basic_block *bbs = get_loop_body (loop);
5944   auto_vec<data_ref_loc, 3> references;
5945
5946   for (unsigned i = 0; i < loop->num_nodes; i++)
5947     {
5948       basic_block bb = bbs[i];
5949       gimple_stmt_iterator bsi;
5950
5951       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5952         {
5953           gimple *stmt = gsi_stmt (bsi);
5954           get_references_in_stmt (stmt, &references);
5955           if (references.length ())
5956             {
5957               free (bbs);
5958               return true;
5959             }
5960         }
5961     }
5962   free (bbs);
5963   return false;
5964 }
5965
5966 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
5967    reference, returns false, otherwise returns true.  NEST is the outermost
5968    loop of the loop nest in which the references should be analyzed.  */
5969
5970 opt_result
5971 find_data_references_in_stmt (class loop *nest, gimple *stmt,
5972                               vec<data_reference_p> *datarefs)
5973 {
5974   auto_vec<data_ref_loc, 2> references;
5975   data_reference_p dr;
5976
5977   if (get_references_in_stmt (stmt, &references))
5978     return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5979                                    stmt);
5980
5981   for (const data_ref_loc &ref : references)
5982     {
5983       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5984                             loop_containing_stmt (stmt), ref.ref,
5985                             stmt, ref.is_read, ref.is_conditional_in_stmt);
5986       gcc_assert (dr != NULL);
5987       datarefs->safe_push (dr);
5988     }
5989
5990   return opt_result::success ();
5991 }
5992
5993 /* Stores the data references in STMT to DATAREFS.  If there is an
5994    unanalyzable reference, returns false, otherwise returns true.
5995    NEST is the outermost loop of the loop nest in which the references
5996    should be instantiated, LOOP is the loop in which the references
5997    should be analyzed.  */
5998
5999 bool
6000 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
6001                                        vec<data_reference_p> *datarefs)
6002 {
6003   auto_vec<data_ref_loc, 2> references;
6004   bool ret = true;
6005   data_reference_p dr;
6006
6007   if (get_references_in_stmt (stmt, &references))
6008     return false;
6009
6010   for (const data_ref_loc &ref : references)
6011     {
6012       dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read,
6013                             ref.is_conditional_in_stmt);
6014       gcc_assert (dr != NULL);
6015       datarefs->safe_push (dr);
6016     }
6017
6018   return ret;
6019 }
6020
6021 /* Search the data references in LOOP, and record the information into
6022    DATAREFS.  Returns chrec_dont_know when failing to analyze a
6023    difficult case, returns NULL_TREE otherwise.  */
6024
6025 tree
6026 find_data_references_in_bb (class loop *loop, basic_block bb,
6027                             vec<data_reference_p> *datarefs)
6028 {
6029   gimple_stmt_iterator bsi;
6030
6031   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
6032     {
6033       gimple *stmt = gsi_stmt (bsi);
6034
6035       if (!find_data_references_in_stmt (loop, stmt, datarefs))
6036         {
6037           struct data_reference *res;
6038           res = XCNEW (struct data_reference);
6039           datarefs->safe_push (res);
6040
6041           return chrec_dont_know;
6042         }
6043     }
6044
6045   return NULL_TREE;
6046 }
6047
6048 /* Search the data references in LOOP, and record the information into
6049    DATAREFS.  Returns chrec_dont_know when failing to analyze a
6050    difficult case, returns NULL_TREE otherwise.
6051
6052    TODO: This function should be made smarter so that it can handle address
6053    arithmetic as if they were array accesses, etc.  */
6054
6055 tree
6056 find_data_references_in_loop (class loop *loop,
6057                               vec<data_reference_p> *datarefs)
6058 {
6059   basic_block bb, *bbs;
6060   unsigned int i;
6061
6062   bbs = get_loop_body_in_dom_order (loop);
6063
6064   for (i = 0; i < loop->num_nodes; i++)
6065     {
6066       bb = bbs[i];
6067
6068       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
6069         {
6070           free (bbs);
6071           return chrec_dont_know;
6072         }
6073     }
6074   free (bbs);
6075
6076   return NULL_TREE;
6077 }
6078
6079 /* Return the alignment in bytes that DRB is guaranteed to have at all
6080    times.  */
6081
6082 unsigned int
6083 dr_alignment (innermost_loop_behavior *drb)
6084 {
6085   /* Get the alignment of BASE_ADDRESS + INIT.  */
6086   unsigned int alignment = drb->base_alignment;
6087   unsigned int misalignment = (drb->base_misalignment
6088                                + TREE_INT_CST_LOW (drb->init));
6089   if (misalignment != 0)
6090     alignment = MIN (alignment, misalignment & -misalignment);
6091
6092   /* Cap it to the alignment of OFFSET.  */
6093   if (!integer_zerop (drb->offset))
6094     alignment = MIN (alignment, drb->offset_alignment);
6095
6096   /* Cap it to the alignment of STEP.  */
6097   if (!integer_zerop (drb->step))
6098     alignment = MIN (alignment, drb->step_alignment);
6099
6100   return alignment;
6101 }
6102
6103 /* If BASE is a pointer-typed SSA name, try to find the object that it
6104    is based on.  Return this object X on success and store the alignment
6105    in bytes of BASE - &X in *ALIGNMENT_OUT.  */
6106
6107 static tree
6108 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
6109 {
6110   if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
6111     return NULL_TREE;
6112
6113   gimple *def = SSA_NAME_DEF_STMT (base);
6114   base = analyze_scalar_evolution (loop_containing_stmt (def), base);
6115
6116   /* Peel chrecs and record the minimum alignment preserved by
6117      all steps.  */
6118   unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6119   while (TREE_CODE (base) == POLYNOMIAL_CHREC)
6120     {
6121       unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
6122       alignment = MIN (alignment, step_alignment);
6123       base = CHREC_LEFT (base);
6124     }
6125
6126   /* Punt if the expression is too complicated to handle.  */
6127   if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
6128     return NULL_TREE;
6129
6130   /* The only useful cases are those for which a dereference folds to something
6131      other than an INDIRECT_REF.  */
6132   tree ref_type = TREE_TYPE (TREE_TYPE (base));
6133   tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
6134   if (!ref)
6135     return NULL_TREE;
6136
6137   /* Analyze the base to which the steps we peeled were applied.  */
6138   poly_int64 bitsize, bitpos, bytepos;
6139   machine_mode mode;
6140   int unsignedp, reversep, volatilep;
6141   tree offset;
6142   base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
6143                               &unsignedp, &reversep, &volatilep);
6144   if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
6145     return NULL_TREE;
6146
6147   /* Restrict the alignment to that guaranteed by the offsets.  */
6148   unsigned int bytepos_alignment = known_alignment (bytepos);
6149   if (bytepos_alignment != 0)
6150     alignment = MIN (alignment, bytepos_alignment);
6151   if (offset)
6152     {
6153       unsigned int offset_alignment = highest_pow2_factor (offset);
6154       alignment = MIN (alignment, offset_alignment);
6155     }
6156
6157   *alignment_out = alignment;
6158   return base;
6159 }
6160
6161 /* Return the object whose alignment would need to be changed in order
6162    to increase the alignment of ADDR.  Store the maximum achievable
6163    alignment in *MAX_ALIGNMENT.  */
6164
6165 tree
6166 get_base_for_alignment (tree addr, unsigned int *max_alignment)
6167 {
6168   tree base = get_base_for_alignment_1 (addr, max_alignment);
6169   if (base)
6170     return base;
6171
6172   if (TREE_CODE (addr) == ADDR_EXPR)
6173     addr = TREE_OPERAND (addr, 0);
6174   *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6175   return addr;
6176 }
6177
6178 /* Recursive helper function.  */
6179
6180 static bool
6181 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
6182 {
6183   /* Inner loops of the nest should not contain siblings.  Example:
6184      when there are two consecutive loops,
6185
6186      | loop_0
6187      |   loop_1
6188      |     A[{0, +, 1}_1]
6189      |   endloop_1
6190      |   loop_2
6191      |     A[{0, +, 1}_2]
6192      |   endloop_2
6193      | endloop_0
6194
6195      the dependence relation cannot be captured by the distance
6196      abstraction.  */
6197   if (loop->next)
6198     return false;
6199
6200   loop_nest->safe_push (loop);
6201   if (loop->inner)
6202     return find_loop_nest_1 (loop->inner, loop_nest);
6203   return true;
6204 }
6205
6206 /* Return false when the LOOP is not well nested.  Otherwise return
6207    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
6208    contain the loops from the outermost to the innermost, as they will
6209    appear in the classic distance vector.  */
6210
6211 bool
6212 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
6213 {
6214   loop_nest->safe_push (loop);
6215   if (loop->inner)
6216     return find_loop_nest_1 (loop->inner, loop_nest);
6217   return true;
6218 }
6219
6220 /* Returns true when the data dependences have been computed, false otherwise.
6221    Given a loop nest LOOP, the following vectors are returned:
6222    DATAREFS is initialized to all the array elements contained in this loop,
6223    DEPENDENCE_RELATIONS contains the relations between the data references.
6224    Compute read-read and self relations if
6225    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
6226
6227 bool
6228 compute_data_dependences_for_loop (class loop *loop,
6229                                    bool compute_self_and_read_read_dependences,
6230                                    vec<loop_p> *loop_nest,
6231                                    vec<data_reference_p> *datarefs,
6232                                    vec<ddr_p> *dependence_relations)
6233 {
6234   bool res = true;
6235
6236   memset (&dependence_stats, 0, sizeof (dependence_stats));
6237
6238   /* If the loop nest is not well formed, or one of the data references
6239      is not computable, give up without spending time to compute other
6240      dependences.  */
6241   if (!loop
6242       || !find_loop_nest (loop, loop_nest)
6243       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
6244       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
6245                                    compute_self_and_read_read_dependences))
6246     res = false;
6247
6248   if (dump_file && (dump_flags & TDF_STATS))
6249     {
6250       fprintf (dump_file, "Dependence tester statistics:\n");
6251
6252       fprintf (dump_file, "Number of dependence tests: %d\n",
6253                dependence_stats.num_dependence_tests);
6254       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
6255                dependence_stats.num_dependence_dependent);
6256       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
6257                dependence_stats.num_dependence_independent);
6258       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
6259                dependence_stats.num_dependence_undetermined);
6260
6261       fprintf (dump_file, "Number of subscript tests: %d\n",
6262                dependence_stats.num_subscript_tests);
6263       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
6264                dependence_stats.num_subscript_undetermined);
6265       fprintf (dump_file, "Number of same subscript function: %d\n",
6266                dependence_stats.num_same_subscript_function);
6267
6268       fprintf (dump_file, "Number of ziv tests: %d\n",
6269                dependence_stats.num_ziv);
6270       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
6271                dependence_stats.num_ziv_dependent);
6272       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
6273                dependence_stats.num_ziv_independent);
6274       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
6275                dependence_stats.num_ziv_unimplemented);
6276
6277       fprintf (dump_file, "Number of siv tests: %d\n",
6278                dependence_stats.num_siv);
6279       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
6280                dependence_stats.num_siv_dependent);
6281       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
6282                dependence_stats.num_siv_independent);
6283       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
6284                dependence_stats.num_siv_unimplemented);
6285
6286       fprintf (dump_file, "Number of miv tests: %d\n",
6287                dependence_stats.num_miv);
6288       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
6289                dependence_stats.num_miv_dependent);
6290       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
6291                dependence_stats.num_miv_independent);
6292       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
6293                dependence_stats.num_miv_unimplemented);
6294     }
6295
6296   return res;
6297 }
6298
6299 /* Free the memory used by a data dependence relation DDR.  */
6300
6301 void
6302 free_dependence_relation (struct data_dependence_relation *ddr)
6303 {
6304   if (ddr == NULL)
6305     return;
6306
6307   if (DDR_SUBSCRIPTS (ddr).exists ())
6308     free_subscripts (DDR_SUBSCRIPTS (ddr));
6309   DDR_DIST_VECTS (ddr).release ();
6310   DDR_DIR_VECTS (ddr).release ();
6311
6312   free (ddr);
6313 }
6314
6315 /* Free the memory used by the data dependence relations from
6316    DEPENDENCE_RELATIONS.  */
6317
6318 void
6319 free_dependence_relations (vec<ddr_p>& dependence_relations)
6320 {
6321   for (data_dependence_relation *ddr : dependence_relations)
6322     if (ddr)
6323       free_dependence_relation (ddr);
6324
6325   dependence_relations.release ();
6326 }
6327
6328 /* Free the memory used by the data references from DATAREFS.  */
6329
6330 void
6331 free_data_refs (vec<data_reference_p>& datarefs)
6332 {
6333   for (data_reference *dr : datarefs)
6334     free_data_ref (dr);
6335   datarefs.release ();
6336 }
6337
6338 /* Common routine implementing both dr_direction_indicator and
6339    dr_zero_step_indicator.  Return USEFUL_MIN if the indicator is known
6340    to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6341    Return the step as the indicator otherwise.  */
6342
6343 static tree
6344 dr_step_indicator (struct data_reference *dr, int useful_min)
6345 {
6346   tree step = DR_STEP (dr);
6347   if (!step)
6348     return NULL_TREE;
6349   STRIP_NOPS (step);
6350   /* Look for cases where the step is scaled by a positive constant
6351      integer, which will often be the access size.  If the multiplication
6352      doesn't change the sign (due to overflow effects) then we can
6353      test the unscaled value instead.  */
6354   if (TREE_CODE (step) == MULT_EXPR
6355       && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6356       && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6357     {
6358       tree factor = TREE_OPERAND (step, 1);
6359       step = TREE_OPERAND (step, 0);
6360
6361       /* Strip widening and truncating conversions as well as nops.  */
6362       if (CONVERT_EXPR_P (step)
6363           && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6364         step = TREE_OPERAND (step, 0);
6365       tree type = TREE_TYPE (step);
6366
6367       /* Get the range of step values that would not cause overflow.  */
6368       widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6369                          / wi::to_widest (factor));
6370       widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6371                          / wi::to_widest (factor));
6372
6373       /* Get the range of values that the unconverted step actually has.  */
6374       wide_int step_min, step_max;
6375       value_range vr;
6376       if (TREE_CODE (step) != SSA_NAME
6377           || !get_range_query (cfun)->range_of_expr (vr, step)
6378           || vr.undefined_p ())
6379         {
6380           step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6381           step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6382         }
6383       else
6384         {
6385           step_min = vr.lower_bound ();
6386           step_max = vr.upper_bound ();
6387         }
6388
6389       /* Check whether the unconverted step has an acceptable range.  */
6390       signop sgn = TYPE_SIGN (type);
6391       if (wi::les_p (minv, widest_int::from (step_min, sgn))
6392           && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6393         {
6394           if (wi::ge_p (step_min, useful_min, sgn))
6395             return ssize_int (useful_min);
6396           else if (wi::lt_p (step_max, 0, sgn))
6397             return ssize_int (-1);
6398           else
6399             return fold_convert (ssizetype, step);
6400         }
6401     }
6402   return DR_STEP (dr);
6403 }
6404
6405 /* Return a value that is negative iff DR has a negative step.  */
6406
6407 tree
6408 dr_direction_indicator (struct data_reference *dr)
6409 {
6410   return dr_step_indicator (dr, 0);
6411 }
6412
6413 /* Return a value that is zero iff DR has a zero step.  */
6414
6415 tree
6416 dr_zero_step_indicator (struct data_reference *dr)
6417 {
6418   return dr_step_indicator (dr, 1);
6419 }
6420
6421 /* Return true if DR is known to have a nonnegative (but possibly zero)
6422    step.  */
6423
6424 bool
6425 dr_known_forward_stride_p (struct data_reference *dr)
6426 {
6427   tree indicator = dr_direction_indicator (dr);
6428   tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6429                                    fold_convert (ssizetype, indicator),
6430                                    ssize_int (0));
6431   return neg_step_val && integer_zerop (neg_step_val);
6432 }