gcc/tree-data-ref.cc

   1 /* Data references and dependences detectors.
   2    Copyright (C) 2003-2023 Free Software Foundation, Inc.
   3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass walks a given loop structure searching for array
  22    references.  The information about the array accesses is recorded
  23    in DATA_REFERENCE structures.
  24
  25    The basic test for determining the dependences is:
  26    given two access functions chrec1 and chrec2 to a same array, and
  27    x and y two vectors from the iteration domain, the same element of
  28    the array is accessed twice at iterations x and y if and only if:
  29    |             chrec1 (x) == chrec2 (y).
  30
  31    The goals of this analysis are:
  32
  33    - to determine the independence: the relation between two
  34      independent accesses is qualified with the chrec_known (this
  35      information allows a loop parallelization),
  36
  37    - when two data references access the same data, to qualify the
  38      dependence relation with classic dependence representations:
  39
  40        - distance vectors
  41        - direction vectors
  42        - loop carried level dependence
  43        - polyhedron dependence
  44      or with the chains of recurrences based representation,
  45
  46    - to define a knowledge base for storing the data dependence
  47      information,
  48
  49    - to define an interface to access this data.
  50
  51
  52    Definitions:
  53
  54    - subscript: given two array accesses a subscript is the tuple
  55    composed of the access functions for a given dimension.  Example:
  56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
  57    (f1, g1), (f2, g2), (f3, g3).
  58
  59    - Diophantine equation: an equation whose coefficients and
  60    solutions are integer constants, for example the equation
  61    |   3*x + 2*y = 1
  62    has an integer solution x = 1 and y = -1.
  63
  64    References:
  65
  66    - "Advanced Compilation for High Performance Computing" by Randy
  67    Allen and Ken Kennedy.
  68    http://citeseer.ist.psu.edu/goff91practical.html
  69
  70    - "Loop Transformations for Restructuring Compilers - The Foundations"
  71    by Utpal Banerjee.
  72
  73
  74 */
  75
  76 #include "config.h"
  77 #include "system.h"
  78 #include "coretypes.h"
  79 #include "backend.h"
  80 #include "rtl.h"
  81 #include "tree.h"
  82 #include "gimple.h"
  83 #include "gimple-pretty-print.h"
  84 #include "alias.h"
  85 #include "fold-const.h"
  86 #include "expr.h"
  87 #include "gimple-iterator.h"
  88 #include "tree-ssa-loop-niter.h"
  89 #include "tree-ssa-loop.h"
  90 #include "tree-ssa.h"
  91 #include "cfgloop.h"
  92 #include "tree-data-ref.h"
  93 #include "tree-scalar-evolution.h"
  94 #include "dumpfile.h"
  95 #include "tree-affine.h"
  96 #include "builtins.h"
  97 #include "tree-eh.h"
  98 #include "ssa.h"
  99 #include "internal-fn.h"
 100 #include "vr-values.h"
 101 #include "range-op.h"
 102 #include "tree-ssa-loop-ivopts.h"
 103 #include "calls.h"
 104
 105 static struct datadep_stats
 106 {
 107   int num_dependence_tests;
 108   int num_dependence_dependent;
 109   int num_dependence_independent;
 110   int num_dependence_undetermined;
 111
 112   int num_subscript_tests;
 113   int num_subscript_undetermined;
 114   int num_same_subscript_function;
 115
 116   int num_ziv;
 117   int num_ziv_independent;
 118   int num_ziv_dependent;
 119   int num_ziv_unimplemented;
 120
 121   int num_siv;
 122   int num_siv_independent;
 123   int num_siv_dependent;
 124   int num_siv_unimplemented;
 125
 126   int num_miv;
 127   int num_miv_independent;
 128   int num_miv_dependent;
 129   int num_miv_unimplemented;
 130 } dependence_stats;
 131
 132 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
 133                                            unsigned int, unsigned int,
 134                                            class loop *);
 135 /* Returns true iff A divides B.  */
 136
 137 static inline bool
 138 tree_fold_divides_p (const_tree a, const_tree b)
 139 {
 140   gcc_assert (TREE_CODE (a) == INTEGER_CST);
 141   gcc_assert (TREE_CODE (b) == INTEGER_CST);
 142   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
 143 }
 144
 145 /* Returns true iff A divides B.  */
 146
 147 static inline bool
 148 int_divides_p (lambda_int a, lambda_int b)
 149 {
 150   return ((b % a) == 0);
 151 }
 152
 153 /* Return true if reference REF contains a union access.  */
 154
 155 static bool
 156 ref_contains_union_access_p (tree ref)
 157 {
 158   while (handled_component_p (ref))
 159     {
 160       ref = TREE_OPERAND (ref, 0);
 161       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
 162           || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
 163         return true;
 164     }
 165   return false;
 166 }
 167
 168 \f
 169
 170 /* Dump into FILE all the data references from DATAREFS.  */
 171
 172 static void
 173 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
 174 {
 175   for (data_reference *dr : datarefs)
 176     dump_data_reference (file, dr);
 177 }
 178
 179 /* Unified dump into FILE all the data references from DATAREFS.  */
 180
 181 DEBUG_FUNCTION void
 182 debug (vec<data_reference_p> &ref)
 183 {
 184   dump_data_references (stderr, ref);
 185 }
 186
 187 DEBUG_FUNCTION void
 188 debug (vec<data_reference_p> *ptr)
 189 {
 190   if (ptr)
 191     debug (*ptr);
 192   else
 193     fprintf (stderr, "<nil>\n");
 194 }
 195
 196
 197 /* Dump into STDERR all the data references from DATAREFS.  */
 198
 199 DEBUG_FUNCTION void
 200 debug_data_references (vec<data_reference_p> datarefs)
 201 {
 202   dump_data_references (stderr, datarefs);
 203 }
 204
 205 /* Print to STDERR the data_reference DR.  */
 206
 207 DEBUG_FUNCTION void
 208 debug_data_reference (struct data_reference *dr)
 209 {
 210   dump_data_reference (stderr, dr);
 211 }
 212
 213 /* Dump function for a DATA_REFERENCE structure.  */
 214
 215 void
 216 dump_data_reference (FILE *outf,
 217                      struct data_reference *dr)
 218 {
 219   unsigned int i;
 220
 221   fprintf (outf, "#(Data Ref: \n");
 222   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
 223   fprintf (outf, "#  stmt: ");
 224   print_gimple_stmt (outf, DR_STMT (dr), 0);
 225   fprintf (outf, "#  ref: ");
 226   print_generic_stmt (outf, DR_REF (dr));
 227   fprintf (outf, "#  base_object: ");
 228   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
 229
 230   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
 231     {
 232       fprintf (outf, "#  Access function %d: ", i);
 233       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
 234     }
 235   fprintf (outf, "#)\n");
 236 }
 237
 238 /* Unified dump function for a DATA_REFERENCE structure.  */
 239
 240 DEBUG_FUNCTION void
 241 debug (data_reference &ref)
 242 {
 243   dump_data_reference (stderr, &ref);
 244 }
 245
 246 DEBUG_FUNCTION void
 247 debug (data_reference *ptr)
 248 {
 249   if (ptr)
 250     debug (*ptr);
 251   else
 252     fprintf (stderr, "<nil>\n");
 253 }
 254
 255
 256 /* Dumps the affine function described by FN to the file OUTF.  */
 257
 258 DEBUG_FUNCTION void
 259 dump_affine_function (FILE *outf, affine_fn fn)
 260 {
 261   unsigned i;
 262   tree coef;
 263
 264   print_generic_expr (outf, fn[0], TDF_SLIM);
 265   for (i = 1; fn.iterate (i, &coef); i++)
 266     {
 267       fprintf (outf, " + ");
 268       print_generic_expr (outf, coef, TDF_SLIM);
 269       fprintf (outf, " * x_%u", i);
 270     }
 271 }
 272
 273 /* Dumps the conflict function CF to the file OUTF.  */
 274
 275 DEBUG_FUNCTION void
 276 dump_conflict_function (FILE *outf, conflict_function *cf)
 277 {
 278   unsigned i;
 279
 280   if (cf->n == NO_DEPENDENCE)
 281     fprintf (outf, "no dependence");
 282   else if (cf->n == NOT_KNOWN)
 283     fprintf (outf, "not known");
 284   else
 285     {
 286       for (i = 0; i < cf->n; i++)
 287         {
 288           if (i != 0)
 289             fprintf (outf, " ");
 290           fprintf (outf, "[");
 291           dump_affine_function (outf, cf->fns[i]);
 292           fprintf (outf, "]");
 293         }
 294     }
 295 }
 296
 297 /* Dump function for a SUBSCRIPT structure.  */
 298
 299 DEBUG_FUNCTION void
 300 dump_subscript (FILE *outf, struct subscript *subscript)
 301 {
 302   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
 303
 304   fprintf (outf, "\n (subscript \n");
 305   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
 306   dump_conflict_function (outf, cf);
 307   if (CF_NONTRIVIAL_P (cf))
 308     {
 309       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 310       fprintf (outf, "\n  last_conflict: ");
 311       print_generic_expr (outf, last_iteration);
 312     }
 313
 314   cf = SUB_CONFLICTS_IN_B (subscript);
 315   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
 316   dump_conflict_function (outf, cf);
 317   if (CF_NONTRIVIAL_P (cf))
 318     {
 319       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 320       fprintf (outf, "\n  last_conflict: ");
 321       print_generic_expr (outf, last_iteration);
 322     }
 323
 324   fprintf (outf, "\n  (Subscript distance: ");
 325   print_generic_expr (outf, SUB_DISTANCE (subscript));
 326   fprintf (outf, " ))\n");
 327 }
 328
 329 /* Print the classic direction vector DIRV to OUTF.  */
 330
 331 DEBUG_FUNCTION void
 332 print_direction_vector (FILE *outf,
 333                         lambda_vector dirv,
 334                         int length)
 335 {
 336   int eq;
 337
 338   for (eq = 0; eq < length; eq++)
 339     {
 340       enum data_dependence_direction dir = ((enum data_dependence_direction)
 341                                             dirv[eq]);
 342
 343       switch (dir)
 344         {
 345         case dir_positive:
 346           fprintf (outf, "    +");
 347           break;
 348         case dir_negative:
 349           fprintf (outf, "    -");
 350           break;
 351         case dir_equal:
 352           fprintf (outf, "    =");
 353           break;
 354         case dir_positive_or_equal:
 355           fprintf (outf, "   +=");
 356           break;
 357         case dir_positive_or_negative:
 358           fprintf (outf, "   +-");
 359           break;
 360         case dir_negative_or_equal:
 361           fprintf (outf, "   -=");
 362           break;
 363         case dir_star:
 364           fprintf (outf, "    *");
 365           break;
 366         default:
 367           fprintf (outf, "indep");
 368           break;
 369         }
 370     }
 371   fprintf (outf, "\n");
 372 }
 373
 374 /* Print a vector of direction vectors.  */
 375
 376 DEBUG_FUNCTION void
 377 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
 378                    int length)
 379 {
 380   for (lambda_vector v : dir_vects)
 381     print_direction_vector (outf, v, length);
 382 }
 383
 384 /* Print out a vector VEC of length N to OUTFILE.  */
 385
 386 DEBUG_FUNCTION void
 387 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
 388 {
 389   int i;
 390
 391   for (i = 0; i < n; i++)
 392     fprintf (outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]);
 393   fprintf (outfile, "\n");
 394 }
 395
 396 /* Print a vector of distance vectors.  */
 397
 398 DEBUG_FUNCTION void
 399 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
 400                     int length)
 401 {
 402   for (lambda_vector v : dist_vects)
 403     print_lambda_vector (outf, v, length);
 404 }
 405
 406 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
 407
 408 DEBUG_FUNCTION void
 409 dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
 410 {
 411   struct data_reference *dra, *drb;
 412
 413   fprintf (outf, "(Data Dep: \n");
 414
 415   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 416     {
 417       if (ddr)
 418         {
 419           dra = DDR_A (ddr);
 420           drb = DDR_B (ddr);
 421           if (dra)
 422             dump_data_reference (outf, dra);
 423           else
 424             fprintf (outf, "    (nil)\n");
 425           if (drb)
 426             dump_data_reference (outf, drb);
 427           else
 428             fprintf (outf, "    (nil)\n");
 429         }
 430       fprintf (outf, "    (don't know)\n)\n");
 431       return;
 432     }
 433
 434   dra = DDR_A (ddr);
 435   drb = DDR_B (ddr);
 436   dump_data_reference (outf, dra);
 437   dump_data_reference (outf, drb);
 438
 439   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 440     fprintf (outf, "    (no dependence)\n");
 441
 442   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
 443     {
 444       unsigned int i;
 445       class loop *loopi;
 446
 447       subscript *sub;
 448       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
 449         {
 450           fprintf (outf, "  access_fn_A: ");
 451           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
 452           fprintf (outf, "  access_fn_B: ");
 453           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
 454           dump_subscript (outf, sub);
 455         }
 456
 457       fprintf (outf, "  loop nest: (");
 458       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
 459         fprintf (outf, "%d ", loopi->num);
 460       fprintf (outf, ")\n");
 461
 462       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
 463         {
 464           fprintf (outf, "  distance_vector: ");
 465           print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
 466                                DDR_NB_LOOPS (ddr));
 467         }
 468
 469       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
 470         {
 471           fprintf (outf, "  direction_vector: ");
 472           print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
 473                                   DDR_NB_LOOPS (ddr));
 474         }
 475     }
 476
 477   fprintf (outf, ")\n");
 478 }
 479
 480 /* Debug version.  */
 481
 482 DEBUG_FUNCTION void
 483 debug_data_dependence_relation (const struct data_dependence_relation *ddr)
 484 {
 485   dump_data_dependence_relation (stderr, ddr);
 486 }
 487
 488 /* Dump into FILE all the dependence relations from DDRS.  */
 489
 490 DEBUG_FUNCTION void
 491 dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
 492 {
 493   for (auto ddr : ddrs)
 494     dump_data_dependence_relation (file, ddr);
 495 }
 496
 497 DEBUG_FUNCTION void
 498 debug (vec<ddr_p> &ref)
 499 {
 500   dump_data_dependence_relations (stderr, ref);
 501 }
 502
 503 DEBUG_FUNCTION void
 504 debug (vec<ddr_p> *ptr)
 505 {
 506   if (ptr)
 507     debug (*ptr);
 508   else
 509     fprintf (stderr, "<nil>\n");
 510 }
 511
 512
 513 /* Dump to STDERR all the dependence relations from DDRS.  */
 514
 515 DEBUG_FUNCTION void
 516 debug_data_dependence_relations (vec<ddr_p> ddrs)
 517 {
 518   dump_data_dependence_relations (stderr, ddrs);
 519 }
 520
 521 /* Dumps the distance and direction vectors in FILE.  DDRS contains
 522    the dependence relations, and VECT_SIZE is the size of the
 523    dependence vectors, or in other words the number of loops in the
 524    considered nest.  */
 525
 526 DEBUG_FUNCTION void
 527 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
 528 {
 529   for (data_dependence_relation *ddr : ddrs)
 530     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
 531       {
 532         for (lambda_vector v : DDR_DIST_VECTS (ddr))
 533           {
 534             fprintf (file, "DISTANCE_V (");
 535             print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
 536             fprintf (file, ")\n");
 537           }
 538
 539         for (lambda_vector v : DDR_DIR_VECTS (ddr))
 540           {
 541             fprintf (file, "DIRECTION_V (");
 542             print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
 543             fprintf (file, ")\n");
 544           }
 545       }
 546
 547   fprintf (file, "\n\n");
 548 }
 549
 550 /* Dumps the data dependence relations DDRS in FILE.  */
 551
 552 DEBUG_FUNCTION void
 553 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
 554 {
 555   for (data_dependence_relation *ddr : ddrs)
 556     dump_data_dependence_relation (file, ddr);
 557
 558   fprintf (file, "\n\n");
 559 }
 560
 561 DEBUG_FUNCTION void
 562 debug_ddrs (vec<ddr_p> ddrs)
 563 {
 564   dump_ddrs (stderr, ddrs);
 565 }
 566
 567 /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
 568    OP0 CODE OP1, where:
 569
 570    - OP0 CODE OP1 has integral type TYPE
 571    - the range of OP0 is given by OP0_RANGE and
 572    - the range of OP1 is given by OP1_RANGE.
 573
 574    Independently of RESULT_RANGE, try to compute:
 575
 576      DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
 577              - (sizetype) (OP0 CODE OP1)
 578
 579    as a constant and subtract DELTA from the ssizetype constant in *OFF.
 580    Return true on success, or false if DELTA is not known at compile time.
 581
 582    Truncation and sign changes are known to distribute over CODE, i.e.
 583
 584      (itype) (A CODE B) == (itype) A CODE (itype) B
 585
 586    for any integral type ITYPE whose precision is no greater than the
 587    precision of A and B.  */
 588
 589 static bool
 590 compute_distributive_range (tree type, value_range &op0_range,
 591                             tree_code code, value_range &op1_range,
 592                             tree *off, value_range *result_range)
 593 {
 594   gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
 595   if (result_range)
 596     {
 597       range_op_handler op (code);
 598       if (!op.fold_range (*result_range, type, op0_range, op1_range))
 599         result_range->set_varying (type);
 600     }
 601
 602   /* The distributive property guarantees that if TYPE is no narrower
 603      than SIZETYPE,
 604
 605        (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
 606
 607      and so we can treat DELTA as zero.  */
 608   if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
 609     return true;
 610
 611   /* If overflow is undefined, we can assume that:
 612
 613        X == (ssizetype) OP0 CODE (ssizetype) OP1
 614
 615      is within the range of TYPE, i.e.:
 616
 617        X == (ssizetype) (TYPE) X
 618
 619      Distributing the (TYPE) truncation over X gives:
 620
 621        X == (ssizetype) (OP0 CODE OP1)
 622
 623      Casting both sides to sizetype and distributing the sizetype cast
 624      over X gives:
 625
 626        (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
 627
 628      and so we can treat DELTA as zero.  */
 629   if (TYPE_OVERFLOW_UNDEFINED (type))
 630     return true;
 631
 632   /* Compute the range of:
 633
 634        (ssizetype) OP0 CODE (ssizetype) OP1
 635
 636      The distributive property guarantees that this has the same bitpattern as:
 637
 638        (sizetype) OP0 CODE (sizetype) OP1
 639
 640      but its range is more conducive to analysis.  */
 641   range_cast (op0_range, ssizetype);
 642   range_cast (op1_range, ssizetype);
 643   value_range wide_range;
 644   range_op_handler op (code);
 645   bool saved_flag_wrapv = flag_wrapv;
 646   flag_wrapv = 1;
 647   if (!op.fold_range (wide_range, ssizetype, op0_range, op1_range))
 648     wide_range.set_varying (ssizetype);;
 649   flag_wrapv = saved_flag_wrapv;
 650   if (wide_range.num_pairs () != 1
 651       || wide_range.varying_p () || wide_range.undefined_p ())
 652     return false;
 653
 654   wide_int lb = wide_range.lower_bound ();
 655   wide_int ub = wide_range.upper_bound ();
 656
 657   /* Calculate the number of times that each end of the range overflows or
 658      underflows TYPE.  We can only calculate DELTA if the numbers match.  */
 659   unsigned int precision = TYPE_PRECISION (type);
 660   if (!TYPE_UNSIGNED (type))
 661     {
 662       wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
 663       lb -= type_min;
 664       ub -= type_min;
 665     }
 666   wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
 667   lb &= upper_bits;
 668   ub &= upper_bits;
 669   if (lb != ub)
 670     return false;
 671
 672   /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
 673      negative values indicating underflow.  The low PRECISION bits of LB
 674      are clear, so DELTA is therefore LB (== UB).  */
 675   *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
 676   return true;
 677 }
 678
 679 /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
 680    given that OP has type FROM_TYPE and range RANGE.  Both TO_TYPE and
 681    FROM_TYPE are integral types.  */
 682
 683 static bool
 684 nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range)
 685 {
 686   gcc_assert (INTEGRAL_TYPE_P (to_type)
 687               && INTEGRAL_TYPE_P (from_type)
 688               && !TYPE_OVERFLOW_TRAPS (to_type)
 689               && !TYPE_OVERFLOW_TRAPS (from_type));
 690
 691   /* Converting to something no narrower than sizetype and then to sizetype
 692      is equivalent to converting directly to sizetype.  */
 693   if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
 694     return true;
 695
 696   /* Check whether TO_TYPE can represent all values that FROM_TYPE can.  */
 697   if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
 698       && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
 699     return true;
 700
 701   /* For narrowing conversions, we could in principle test whether
 702      the bits in FROM_TYPE but not in TO_TYPE have a fixed value
 703      and apply a constant adjustment.
 704
 705      For other conversions (which involve a sign change) we could
 706      check that the signs are always equal, and apply a constant
 707      adjustment if the signs are negative.
 708
 709      However, both cases should be rare.  */
 710   return range_fits_type_p (&range, TYPE_PRECISION (to_type),
 711                             TYPE_SIGN (to_type));
 712 }
 713
 714 static void
 715 split_constant_offset (tree type, tree *var, tree *off,
 716                        value_range *result_range,
 717                        hash_map<tree, std::pair<tree, tree> > &cache,
 718                        unsigned *limit);
 719
 720 /* Helper function for split_constant_offset.  If TYPE is a pointer type,
 721    try to express OP0 CODE OP1 as:
 722
 723      POINTER_PLUS <*VAR, (sizetype) *OFF>
 724
 725    where:
 726
 727    - *VAR has type TYPE
 728    - *OFF is a constant of type ssizetype.
 729
 730    If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
 731
 732      *VAR + (sizetype) *OFF
 733
 734    where:
 735
 736    - *VAR has type sizetype
 737    - *OFF is a constant of type ssizetype.
 738
 739    In both cases, OP0 CODE OP1 has type TYPE.
 740
 741    Return true on success.  A false return value indicates that we can't
 742    do better than set *OFF to zero.
 743
 744    When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
 745    if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
 746
 747    CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
 748    visited.  LIMIT counts down the number of SSA names that we are
 749    allowed to process before giving up.  */
 750
 751 static bool
 752 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 753                          tree *var, tree *off, value_range *result_range,
 754                          hash_map<tree, std::pair<tree, tree> > &cache,
 755                          unsigned *limit)
 756 {
 757   tree var0, var1;
 758   tree off0, off1;
 759   value_range op0_range, op1_range;
 760
 761   *var = NULL_TREE;
 762   *off = NULL_TREE;
 763
 764   if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
 765     return false;
 766
 767   switch (code)
 768     {
 769     case INTEGER_CST:
 770       *var = size_int (0);
 771       *off = fold_convert (ssizetype, op0);
 772       if (result_range)
 773         {
 774           wide_int w = wi::to_wide (op0);
 775           result_range->set (TREE_TYPE (op0), w, w);
 776         }
 777       return true;
 778
 779     case POINTER_PLUS_EXPR:
 780       split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
 781       split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
 782       *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
 783       *off = size_binop (PLUS_EXPR, off0, off1);
 784       return true;
 785
 786     case PLUS_EXPR:
 787     case MINUS_EXPR:
 788       split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
 789       split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
 790       *off = size_binop (code, off0, off1);
 791       if (!compute_distributive_range (type, op0_range, code, op1_range,
 792                                        off, result_range))
 793         return false;
 794       *var = fold_build2 (code, sizetype, var0, var1);
 795       return true;
 796
 797     case MULT_EXPR:
 798       if (TREE_CODE (op1) != INTEGER_CST)
 799         return false;
 800
 801       split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
 802       op1_range.set (TREE_TYPE (op1), wi::to_wide (op1), wi::to_wide (op1));
 803       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
 804       if (!compute_distributive_range (type, op0_range, code, op1_range,
 805                                        off, result_range))
 806         return false;
 807       *var = fold_build2 (MULT_EXPR, sizetype, var0,
 808                           fold_convert (sizetype, op1));
 809       return true;
 810
 811     case ADDR_EXPR:
 812       {
 813         tree base, poffset;
 814         poly_int64 pbitsize, pbitpos, pbytepos;
 815         machine_mode pmode;
 816         int punsignedp, preversep, pvolatilep;
 817
 818         op0 = TREE_OPERAND (op0, 0);
 819         base
 820           = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
 821                                  &punsignedp, &preversep, &pvolatilep);
 822
 823         if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 824           return false;
 825         base = build_fold_addr_expr (base);
 826         off0 = ssize_int (pbytepos);
 827
 828         if (poffset)
 829           {
 830             split_constant_offset (poffset, &poffset, &off1, nullptr,
 831                                    cache, limit);
 832             off0 = size_binop (PLUS_EXPR, off0, off1);
 833             base = fold_build_pointer_plus (base, poffset);
 834           }
 835
 836         var0 = fold_convert (type, base);
 837
 838         /* If variable length types are involved, punt, otherwise casts
 839            might be converted into ARRAY_REFs in gimplify_conversion.
 840            To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
 841            possibly no longer appears in current GIMPLE, might resurface.
 842            This perhaps could run
 843            if (CONVERT_EXPR_P (var0))
 844              {
 845                gimplify_conversion (&var0);
 846                // Attempt to fill in any within var0 found ARRAY_REF's
 847                // element size from corresponding op embedded ARRAY_REF,
 848                // if unsuccessful, just punt.
 849              }  */
 850         while (POINTER_TYPE_P (type))
 851           type = TREE_TYPE (type);
 852         if (int_size_in_bytes (type) < 0)
 853           return false;
 854
 855         *var = var0;
 856         *off = off0;
 857         return true;
 858       }
 859
 860     case SSA_NAME:
 861       {
 862         if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
 863           return false;
 864
 865         gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
 866         enum tree_code subcode;
 867
 868         if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
 869           return false;
 870
 871         subcode = gimple_assign_rhs_code (def_stmt);
 872
 873         /* We are using a cache to avoid un-CSEing large amounts of code.  */
 874         bool use_cache = false;
 875         if (!has_single_use (op0)
 876             && (subcode == POINTER_PLUS_EXPR
 877                 || subcode == PLUS_EXPR
 878                 || subcode == MINUS_EXPR
 879                 || subcode == MULT_EXPR
 880                 || subcode == ADDR_EXPR
 881                 || CONVERT_EXPR_CODE_P (subcode)))
 882           {
 883             use_cache = true;
 884             bool existed;
 885             std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
 886             if (existed)
 887               {
 888                 if (integer_zerop (e.second))
 889                   return false;
 890                 *var = e.first;
 891                 *off = e.second;
 892                 /* The caller sets the range in this case.  */
 893                 return true;
 894               }
 895             e = std::make_pair (op0, ssize_int (0));
 896           }
 897
 898         if (*limit == 0)
 899           return false;
 900         --*limit;
 901
 902         var0 = gimple_assign_rhs1 (def_stmt);
 903         var1 = gimple_assign_rhs2 (def_stmt);
 904
 905         bool res = split_constant_offset_1 (type, var0, subcode, var1,
 906                                             var, off, nullptr, cache, limit);
 907         if (res && use_cache)
 908           *cache.get (op0) = std::make_pair (*var, *off);
 909         /* The caller sets the range in this case.  */
 910         return res;
 911       }
 912     CASE_CONVERT:
 913       {
 914         /* We can only handle the following conversions:
 915
 916            - Conversions from one pointer type to another pointer type.
 917
 918            - Conversions from one non-trapping integral type to another
 919              non-trapping integral type.  In this case, the recursive
 920              call makes sure that:
 921
 922                (sizetype) OP0
 923
 924              can be expressed as a sizetype operation involving VAR and OFF,
 925              and all we need to do is check whether:
 926
 927                (sizetype) OP0 == (sizetype) (TYPE) OP0
 928
 929            - Conversions from a non-trapping sizetype-size integral type to
 930              a like-sized pointer type.  In this case, the recursive call
 931              makes sure that:
 932
 933                (sizetype) OP0 == *VAR + (sizetype) *OFF
 934
 935              and we can convert that to:
 936
 937                POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
 938
 939            - Conversions from a sizetype-sized pointer type to a like-sized
 940              non-trapping integral type.  In this case, the recursive call
 941              makes sure that:
 942
 943                OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
 944
 945              where the POINTER_PLUS and *VAR have the same precision as
 946              TYPE (and the same precision as sizetype).  Then:
 947
 948                (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF.  */
 949         tree itype = TREE_TYPE (op0);
 950         if ((POINTER_TYPE_P (itype)
 951              || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
 952             && (POINTER_TYPE_P (type)
 953                 || (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
 954             && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
 955                 || (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
 956                     && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
 957           {
 958             if (POINTER_TYPE_P (type))
 959               {
 960                 split_constant_offset (op0, var, off, nullptr, cache, limit);
 961                 *var = fold_convert (type, *var);
 962               }
 963             else if (POINTER_TYPE_P (itype))
 964               {
 965                 split_constant_offset (op0, var, off, nullptr, cache, limit);
 966                 *var = fold_convert (sizetype, *var);
 967               }
 968             else
 969               {
 970                 split_constant_offset (op0, var, off, &op0_range,
 971                                        cache, limit);
 972                 if (!nop_conversion_for_offset_p (type, itype, op0_range))
 973                   return false;
 974                 if (result_range)
 975                   {
 976                     *result_range = op0_range;
 977                     range_cast (*result_range, type);
 978                   }
 979               }
 980             return true;
 981           }
 982         return false;
 983       }
 984
 985     default:
 986       return false;
 987     }
 988 }
 989
 990 /* If EXP has pointer type, try to express it as:
 991
 992      POINTER_PLUS <*VAR, (sizetype) *OFF>
 993
 994    where:
 995
 996    - *VAR has the same type as EXP
 997    - *OFF is a constant of type ssizetype.
 998
 999    If EXP has an integral type, try to express (sizetype) EXP as:
1000
1001      *VAR + (sizetype) *OFF
1002
1003    where:
1004
1005    - *VAR has type sizetype
1006    - *OFF is a constant of type ssizetype.
1007
1008    If EXP_RANGE is nonnull, set it to the range of EXP.
1009
1010    CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1011    visited.  LIMIT counts down the number of SSA names that we are
1012    allowed to process before giving up.  */
1013
1014 static void
1015 split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range,
1016                        hash_map<tree, std::pair<tree, tree> > &cache,
1017                        unsigned *limit)
1018 {
1019   tree type = TREE_TYPE (exp), op0, op1;
1020   enum tree_code code;
1021
1022   code = TREE_CODE (exp);
1023   if (exp_range)
1024     {
1025       *exp_range = type;
1026       if (code == SSA_NAME)
1027         {
1028           value_range vr;
1029           get_range_query (cfun)->range_of_expr (vr, exp);
1030           if (vr.undefined_p ())
1031             vr.set_varying (TREE_TYPE (exp));
1032           tree vr_min, vr_max;
1033           value_range_kind vr_kind = get_legacy_range (vr, vr_min, vr_max);
1034           wide_int var_min = wi::to_wide (vr_min);
1035           wide_int var_max = wi::to_wide (vr_max);
1036           wide_int var_nonzero = get_nonzero_bits (exp);
1037           vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1038                                                        &var_min, &var_max,
1039                                                        var_nonzero,
1040                                                        TYPE_SIGN (type));
1041           /* This check for VR_VARYING is here because the old code
1042              using get_range_info would return VR_RANGE for the entire
1043              domain, instead of VR_VARYING.  The new code normalizes
1044              full-domain ranges to VR_VARYING.  */
1045           if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
1046             *exp_range = value_range (type, var_min, var_max);
1047         }
1048     }
1049
1050   if (!tree_is_chrec (exp)
1051       && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1052     {
1053       extract_ops_from_tree (exp, &code, &op0, &op1);
1054       if (split_constant_offset_1 (type, op0, code, op1, var, off,
1055                                    exp_range, cache, limit))
1056         return;
1057     }
1058
1059   *var = exp;
1060   if (INTEGRAL_TYPE_P (type))
1061     *var = fold_convert (sizetype, *var);
1062   *off = ssize_int (0);
1063
1064   value_range r;
1065   if (exp_range && code != SSA_NAME
1066       && get_range_query (cfun)->range_of_expr (r, exp)
1067       && !r.undefined_p ())
1068     *exp_range = r;
1069 }
1070
1071 /* Expresses EXP as VAR + OFF, where OFF is a constant.  VAR has the same
1072    type as EXP while OFF has type ssizetype.  */
1073
1074 void
1075 split_constant_offset (tree exp, tree *var, tree *off)
1076 {
1077   unsigned limit = param_ssa_name_def_chain_limit;
1078   static hash_map<tree, std::pair<tree, tree> > *cache;
1079   if (!cache)
1080     cache = new hash_map<tree, std::pair<tree, tree> > (37);
1081   split_constant_offset (exp, var, off, nullptr, *cache, &limit);
1082   *var = fold_convert (TREE_TYPE (exp), *var);
1083   cache->empty ();
1084 }
1085
1086 /* Returns the address ADDR of an object in a canonical shape (without nop
1087    casts, and with type of pointer to the object).  */
1088
1089 static tree
1090 canonicalize_base_object_address (tree addr)
1091 {
1092   tree orig = addr;
1093
1094   STRIP_NOPS (addr);
1095
1096   /* The base address may be obtained by casting from integer, in that case
1097      keep the cast.  */
1098   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1099     return orig;
1100
1101   if (TREE_CODE (addr) != ADDR_EXPR)
1102     return addr;
1103
1104   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1105 }
1106
1107 /* Analyze the behavior of memory reference REF within STMT.
1108    There are two modes:
1109
1110    - BB analysis.  In this case we simply split the address into base,
1111      init and offset components, without reference to any containing loop.
1112      The resulting base and offset are general expressions and they can
1113      vary arbitrarily from one iteration of the containing loop to the next.
1114      The step is always zero.
1115
1116    - loop analysis.  In this case we analyze the reference both wrt LOOP
1117      and on the basis that the reference occurs (is "used") in LOOP;
1118      see the comment above analyze_scalar_evolution_in_loop for more
1119      information about this distinction.  The base, init, offset and
1120      step fields are all invariant in LOOP.
1121
1122    Perform BB analysis if LOOP is null, or if LOOP is the function's
1123    dummy outermost loop.  In other cases perform loop analysis.
1124
1125    Return true if the analysis succeeded and store the results in DRB if so.
1126    BB analysis can only fail for bitfield or reversed-storage accesses.  */
1127
1128 opt_result
1129 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1130                       class loop *loop, const gimple *stmt)
1131 {
1132   poly_int64 pbitsize, pbitpos;
1133   tree base, poffset;
1134   machine_mode pmode;
1135   int punsignedp, preversep, pvolatilep;
1136   affine_iv base_iv, offset_iv;
1137   tree init, dinit, step;
1138   bool in_loop = (loop && loop->num);
1139
1140   if (dump_file && (dump_flags & TDF_DETAILS))
1141     fprintf (dump_file, "analyze_innermost: ");
1142
1143   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1144                               &punsignedp, &preversep, &pvolatilep);
1145   gcc_assert (base != NULL_TREE);
1146
1147   poly_int64 pbytepos;
1148   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
1149     return opt_result::failure_at (stmt,
1150                                    "failed: bit offset alignment.\n");
1151
1152   if (preversep)
1153     return opt_result::failure_at (stmt,
1154                                    "failed: reverse storage order.\n");
1155
1156   /* Calculate the alignment and misalignment for the inner reference.  */
1157   unsigned int HOST_WIDE_INT bit_base_misalignment;
1158   unsigned int bit_base_alignment;
1159   get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1160
1161   /* There are no bitfield references remaining in BASE, so the values
1162      we got back must be whole bytes.  */
1163   gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1164               && bit_base_misalignment % BITS_PER_UNIT == 0);
1165   unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1166   poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1167
1168   if (TREE_CODE (base) == MEM_REF)
1169     {
1170       if (!integer_zerop (TREE_OPERAND (base, 1)))
1171         {
1172           /* Subtract MOFF from the base and add it to POFFSET instead.
1173              Adjust the misalignment to reflect the amount we subtracted.  */
1174           poly_offset_int moff = mem_ref_offset (base);
1175           base_misalignment -= moff.force_shwi ();
1176           tree mofft = wide_int_to_tree (sizetype, moff);
1177           if (!poffset)
1178             poffset = mofft;
1179           else
1180             poffset = size_binop (PLUS_EXPR, poffset, mofft);
1181         }
1182       base = TREE_OPERAND (base, 0);
1183     }
1184   else
1185     base = build_fold_addr_expr (base);
1186
1187   if (in_loop)
1188     {
1189       if (!simple_iv (loop, loop, base, &base_iv, true))
1190         return opt_result::failure_at
1191           (stmt, "failed: evolution of base is not affine.\n");
1192     }
1193   else
1194     {
1195       base_iv.base = base;
1196       base_iv.step = ssize_int (0);
1197       base_iv.no_overflow = true;
1198     }
1199
1200   if (!poffset)
1201     {
1202       offset_iv.base = ssize_int (0);
1203       offset_iv.step = ssize_int (0);
1204     }
1205   else
1206     {
1207       if (!in_loop)
1208         {
1209           offset_iv.base = poffset;
1210           offset_iv.step = ssize_int (0);
1211         }
1212       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1213         return opt_result::failure_at
1214           (stmt, "failed: evolution of offset is not affine.\n");
1215     }
1216
1217   init = ssize_int (pbytepos);
1218
1219   /* Subtract any constant component from the base and add it to INIT instead.
1220      Adjust the misalignment to reflect the amount we subtracted.  */
1221   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1222   init = size_binop (PLUS_EXPR, init, dinit);
1223   base_misalignment -= TREE_INT_CST_LOW (dinit);
1224
1225   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1226   init = size_binop (PLUS_EXPR, init, dinit);
1227
1228   step = size_binop (PLUS_EXPR,
1229                      fold_convert (ssizetype, base_iv.step),
1230                      fold_convert (ssizetype, offset_iv.step));
1231
1232   base = canonicalize_base_object_address (base_iv.base);
1233
1234   /* See if get_pointer_alignment can guarantee a higher alignment than
1235      the one we calculated above.  */
1236   unsigned int HOST_WIDE_INT alt_misalignment;
1237   unsigned int alt_alignment;
1238   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1239
1240   /* As above, these values must be whole bytes.  */
1241   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1242               && alt_misalignment % BITS_PER_UNIT == 0);
1243   alt_alignment /= BITS_PER_UNIT;
1244   alt_misalignment /= BITS_PER_UNIT;
1245
1246   if (base_alignment < alt_alignment)
1247     {
1248       base_alignment = alt_alignment;
1249       base_misalignment = alt_misalignment;
1250     }
1251
1252   drb->base_address = base;
1253   drb->offset = fold_convert (ssizetype, offset_iv.base);
1254   drb->init = init;
1255   drb->step = step;
1256   if (known_misalignment (base_misalignment, base_alignment,
1257                           &drb->base_misalignment))
1258     drb->base_alignment = base_alignment;
1259   else
1260     {
1261       drb->base_alignment = known_alignment (base_misalignment);
1262       drb->base_misalignment = 0;
1263     }
1264   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1265   drb->step_alignment = highest_pow2_factor (step);
1266
1267   if (dump_file && (dump_flags & TDF_DETAILS))
1268     fprintf (dump_file, "success.\n");
1269
1270   return opt_result::success ();
1271 }
1272
1273 /* Return true if OP is a valid component reference for a DR access
1274    function.  This accepts a subset of what handled_component_p accepts.  */
1275
1276 static bool
1277 access_fn_component_p (tree op)
1278 {
1279   switch (TREE_CODE (op))
1280     {
1281     case REALPART_EXPR:
1282     case IMAGPART_EXPR:
1283     case ARRAY_REF:
1284       return true;
1285
1286     case COMPONENT_REF:
1287       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1288
1289     default:
1290       return false;
1291     }
1292 }
1293
1294 /* Returns whether BASE can have a access_fn_component_p with BASE
1295    as base.  */
1296
1297 static bool
1298 base_supports_access_fn_components_p (tree base)
1299 {
1300   switch (TREE_CODE (TREE_TYPE (base)))
1301     {
1302     case COMPLEX_TYPE:
1303     case ARRAY_TYPE:
1304     case RECORD_TYPE:
1305       return true;
1306     default:
1307       return false;
1308     }
1309 }
1310
1311 /* Determines the base object and the list of indices of memory reference
1312    DR, analyzed in LOOP and instantiated before NEST.  */
1313
1314 static void
1315 dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
1316 {
1317   /* If analyzing a basic-block there are no indices to analyze
1318      and thus no access functions.  */
1319   if (!nest)
1320     {
1321       dri->base_object = ref;
1322       dri->access_fns.create (0);
1323       return;
1324     }
1325
1326   vec<tree> access_fns = vNULL;
1327
1328   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1329      into a two element array with a constant index.  The base is
1330      then just the immediate underlying object.  */
1331   if (TREE_CODE (ref) == REALPART_EXPR)
1332     {
1333       ref = TREE_OPERAND (ref, 0);
1334       access_fns.safe_push (integer_zero_node);
1335     }
1336   else if (TREE_CODE (ref) == IMAGPART_EXPR)
1337     {
1338       ref = TREE_OPERAND (ref, 0);
1339       access_fns.safe_push (integer_one_node);
1340     }
1341
1342   /* Analyze access functions of dimensions we know to be independent.
1343      The list of component references handled here should be kept in
1344      sync with access_fn_component_p.  */
1345   while (handled_component_p (ref))
1346     {
1347       if (TREE_CODE (ref) == ARRAY_REF)
1348         {
1349           tree op = TREE_OPERAND (ref, 1);
1350           tree access_fn = analyze_scalar_evolution (loop, op);
1351           access_fn = instantiate_scev (nest, loop, access_fn);
1352           access_fns.safe_push (access_fn);
1353         }
1354       else if (TREE_CODE (ref) == COMPONENT_REF
1355                && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1356         {
1357           /* For COMPONENT_REFs of records (but not unions!) use the
1358              FIELD_DECL offset as constant access function so we can
1359              disambiguate a[i].f1 and a[i].f2.  */
1360           tree off = component_ref_field_offset (ref);
1361           off = size_binop (PLUS_EXPR,
1362                             size_binop (MULT_EXPR,
1363                                         fold_convert (bitsizetype, off),
1364                                         bitsize_int (BITS_PER_UNIT)),
1365                             DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1366           access_fns.safe_push (off);
1367         }
1368       else
1369         /* If we have an unhandled component we could not translate
1370            to an access function stop analyzing.  We have determined
1371            our base object in this case.  */
1372         break;
1373
1374       ref = TREE_OPERAND (ref, 0);
1375     }
1376
1377   /* If the address operand of a MEM_REF base has an evolution in the
1378      analyzed nest, add it as an additional independent access-function.  */
1379   if (TREE_CODE (ref) == MEM_REF)
1380     {
1381       tree op = TREE_OPERAND (ref, 0);
1382       tree access_fn = analyze_scalar_evolution (loop, op);
1383       access_fn = instantiate_scev (nest, loop, access_fn);
1384       STRIP_NOPS (access_fn);
1385       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1386         {
1387           tree memoff = TREE_OPERAND (ref, 1);
1388           tree base = initial_condition (access_fn);
1389           tree orig_type = TREE_TYPE (base);
1390           STRIP_USELESS_TYPE_CONVERSION (base);
1391           tree off;
1392           split_constant_offset (base, &base, &off);
1393           STRIP_USELESS_TYPE_CONVERSION (base);
1394           /* Fold the MEM_REF offset into the evolutions initial
1395              value to make more bases comparable.  */
1396           if (!integer_zerop (memoff))
1397             {
1398               off = size_binop (PLUS_EXPR, off,
1399                                 fold_convert (ssizetype, memoff));
1400               memoff = build_int_cst (TREE_TYPE (memoff), 0);
1401             }
1402           /* Adjust the offset so it is a multiple of the access type
1403              size and thus we separate bases that can possibly be used
1404              to produce partial overlaps (which the access_fn machinery
1405              cannot handle).  */
1406           wide_int rem;
1407           if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1408               && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1409               && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1410             rem = wi::mod_trunc
1411               (wi::to_wide (off),
1412                wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1413                SIGNED);
1414           else
1415             /* If we can't compute the remainder simply force the initial
1416                condition to zero.  */
1417             rem = wi::to_wide (off);
1418           off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1419           memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1420           /* And finally replace the initial condition.  */
1421           access_fn = chrec_replace_initial_condition
1422               (access_fn, fold_convert (orig_type, off));
1423           /* ???  This is still not a suitable base object for
1424              dr_may_alias_p - the base object needs to be an
1425              access that covers the object as whole.  With
1426              an evolution in the pointer this cannot be
1427              guaranteed.
1428              As a band-aid, mark the access so we can special-case
1429              it in dr_may_alias_p.  */
1430           tree old = ref;
1431           ref = fold_build2_loc (EXPR_LOCATION (ref),
1432                                  MEM_REF, TREE_TYPE (ref),
1433                                  base, memoff);
1434           MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1435           MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1436           dri->unconstrained_base = true;
1437           access_fns.safe_push (access_fn);
1438         }
1439     }
1440   else if (DECL_P (ref))
1441     {
1442       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1443       ref = build2 (MEM_REF, TREE_TYPE (ref),
1444                     build_fold_addr_expr (ref),
1445                     build_int_cst (reference_alias_ptr_type (ref), 0));
1446     }
1447
1448   dri->base_object = ref;
1449   dri->access_fns = access_fns;
1450 }
1451
1452 /* Extracts the alias analysis information from the memory reference DR.  */
1453
1454 static void
1455 dr_analyze_alias (struct data_reference *dr)
1456 {
1457   tree ref = DR_REF (dr);
1458   tree base = get_base_address (ref), addr;
1459
1460   if (INDIRECT_REF_P (base)
1461       || TREE_CODE (base) == MEM_REF)
1462     {
1463       addr = TREE_OPERAND (base, 0);
1464       if (TREE_CODE (addr) == SSA_NAME)
1465         DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1466     }
1467 }
1468
1469 /* Frees data reference DR.  */
1470
1471 void
1472 free_data_ref (data_reference_p dr)
1473 {
1474   DR_ACCESS_FNS (dr).release ();
1475   if (dr->alt_indices.base_object)
1476     dr->alt_indices.access_fns.release ();
1477   free (dr);
1478 }
1479
1480 /* Analyze memory reference MEMREF, which is accessed in STMT.
1481    The reference is a read if IS_READ is true, otherwise it is a write.
1482    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1483    within STMT, i.e. that it might not occur even if STMT is executed
1484    and runs to completion.
1485
1486    Return the data_reference description of MEMREF.  NEST is the outermost
1487    loop in which the reference should be instantiated, LOOP is the loop
1488    in which the data reference should be analyzed.  */
1489
1490 struct data_reference *
1491 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1492                  bool is_read, bool is_conditional_in_stmt)
1493 {
1494   struct data_reference *dr;
1495
1496   if (dump_file && (dump_flags & TDF_DETAILS))
1497     {
1498       fprintf (dump_file, "Creating dr for ");
1499       print_generic_expr (dump_file, memref, TDF_SLIM);
1500       fprintf (dump_file, "\n");
1501     }
1502
1503   dr = XCNEW (struct data_reference);
1504   DR_STMT (dr) = stmt;
1505   DR_REF (dr) = memref;
1506   DR_IS_READ (dr) = is_read;
1507   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1508
1509   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1510                         nest != NULL ? loop : NULL, stmt);
1511   dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop);
1512   dr_analyze_alias (dr);
1513
1514   if (dump_file && (dump_flags & TDF_DETAILS))
1515     {
1516       unsigned i;
1517       fprintf (dump_file, "\tbase_address: ");
1518       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1519       fprintf (dump_file, "\n\toffset from base address: ");
1520       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1521       fprintf (dump_file, "\n\tconstant offset from base address: ");
1522       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1523       fprintf (dump_file, "\n\tstep: ");
1524       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1525       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1526       fprintf (dump_file, "\n\tbase misalignment: %d",
1527                DR_BASE_MISALIGNMENT (dr));
1528       fprintf (dump_file, "\n\toffset alignment: %d",
1529                DR_OFFSET_ALIGNMENT (dr));
1530       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1531       fprintf (dump_file, "\n\tbase_object: ");
1532       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1533       fprintf (dump_file, "\n");
1534       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1535         {
1536           fprintf (dump_file, "\tAccess function %d: ", i);
1537           print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1538         }
1539     }
1540
1541   return dr;
1542 }
1543
1544 /*  A helper function computes order between two tree expressions T1 and T2.
1545     This is used in comparator functions sorting objects based on the order
1546     of tree expressions.  The function returns -1, 0, or 1.  */
1547
1548 int
1549 data_ref_compare_tree (tree t1, tree t2)
1550 {
1551   int i, cmp;
1552   enum tree_code code;
1553   char tclass;
1554
1555   if (t1 == t2)
1556     return 0;
1557   if (t1 == NULL)
1558     return -1;
1559   if (t2 == NULL)
1560     return 1;
1561
1562   STRIP_USELESS_TYPE_CONVERSION (t1);
1563   STRIP_USELESS_TYPE_CONVERSION (t2);
1564   if (t1 == t2)
1565     return 0;
1566
1567   if (TREE_CODE (t1) != TREE_CODE (t2)
1568       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1569     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1570
1571   code = TREE_CODE (t1);
1572   switch (code)
1573     {
1574     case INTEGER_CST:
1575       return tree_int_cst_compare (t1, t2);
1576
1577     case STRING_CST:
1578       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1579         return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1580       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1581                      TREE_STRING_LENGTH (t1));
1582
1583     case SSA_NAME:
1584       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1585         return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1586       break;
1587
1588     default:
1589       if (POLY_INT_CST_P (t1))
1590         return compare_sizes_for_sort (wi::to_poly_widest (t1),
1591                                        wi::to_poly_widest (t2));
1592
1593       tclass = TREE_CODE_CLASS (code);
1594
1595       /* For decls, compare their UIDs.  */
1596       if (tclass == tcc_declaration)
1597         {
1598           if (DECL_UID (t1) != DECL_UID (t2))
1599             return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1600           break;
1601         }
1602       /* For expressions, compare their operands recursively.  */
1603       else if (IS_EXPR_CODE_CLASS (tclass))
1604         {
1605           for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1606             {
1607               cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1608                                            TREE_OPERAND (t2, i));
1609               if (cmp != 0)
1610                 return cmp;
1611             }
1612         }
1613       else
1614         gcc_unreachable ();
1615     }
1616
1617   return 0;
1618 }
1619
1620 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1621    check.  */
1622
1623 opt_result
1624 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1625 {
1626   if (dump_enabled_p ())
1627     dump_printf (MSG_NOTE,
1628                  "consider run-time aliasing test between %T and %T\n",
1629                  DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1630
1631   if (!speed_p)
1632     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1633                                    "runtime alias check not supported when"
1634                                    " optimizing for size.\n");
1635
1636   /* FORNOW: We don't support versioning with outer-loop in either
1637      vectorization or loop distribution.  */
1638   if (loop != NULL && loop->inner != NULL)
1639     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1640                                    "runtime alias check not supported for"
1641                                    " outer loop.\n");
1642
1643   return opt_result::success ();
1644 }
1645
1646 /* Operator == between two dr_with_seg_len objects.
1647
1648    This equality operator is used to make sure two data refs
1649    are the same one so that we will consider to combine the
1650    aliasing checks of those two pairs of data dependent data
1651    refs.  */
1652
1653 static bool
1654 operator == (const dr_with_seg_len& d1,
1655              const dr_with_seg_len& d2)
1656 {
1657   return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1658                            DR_BASE_ADDRESS (d2.dr), 0)
1659           && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1660           && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1661           && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1662           && known_eq (d1.access_size, d2.access_size)
1663           && d1.align == d2.align);
1664 }
1665
1666 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1667    so that we can combine aliasing checks in one scan.  */
1668
1669 static int
1670 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1671 {
1672   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1673   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1674   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1675   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1676
1677   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1678      if a and c have the same basic address snd step, and b and d have the same
1679      address and step.  Therefore, if any a&c or b&d don't have the same address
1680      and step, we don't care the order of those two pairs after sorting.  */
1681   int comp_res;
1682
1683   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1684                                          DR_BASE_ADDRESS (b1.dr))) != 0)
1685     return comp_res;
1686   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1687                                          DR_BASE_ADDRESS (b2.dr))) != 0)
1688     return comp_res;
1689   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1690                                          DR_STEP (b1.dr))) != 0)
1691     return comp_res;
1692   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1693                                          DR_STEP (b2.dr))) != 0)
1694     return comp_res;
1695   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1696                                          DR_OFFSET (b1.dr))) != 0)
1697     return comp_res;
1698   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1699                                          DR_INIT (b1.dr))) != 0)
1700     return comp_res;
1701   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1702                                          DR_OFFSET (b2.dr))) != 0)
1703     return comp_res;
1704   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1705                                          DR_INIT (b2.dr))) != 0)
1706     return comp_res;
1707
1708   return 0;
1709 }
1710
1711 /* Dump information about ALIAS_PAIR, indenting each line by INDENT.  */
1712
1713 static void
1714 dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1715 {
1716   dump_printf (MSG_NOTE, "%sreference:      %T vs. %T\n", indent,
1717                DR_REF (alias_pair->first.dr),
1718                DR_REF (alias_pair->second.dr));
1719
1720   dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1721                alias_pair->first.seg_len);
1722   if (!operand_equal_p (alias_pair->first.seg_len,
1723                         alias_pair->second.seg_len, 0))
1724     dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1725
1726   dump_printf (MSG_NOTE, "\n%saccess size:    ", indent);
1727   dump_dec (MSG_NOTE, alias_pair->first.access_size);
1728   if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1729     {
1730       dump_printf (MSG_NOTE, " vs. ");
1731       dump_dec (MSG_NOTE, alias_pair->second.access_size);
1732     }
1733
1734   dump_printf (MSG_NOTE, "\n%salignment:      %d", indent,
1735                alias_pair->first.align);
1736   if (alias_pair->first.align != alias_pair->second.align)
1737     dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1738
1739   dump_printf (MSG_NOTE, "\n%sflags:         ", indent);
1740   if (alias_pair->flags & DR_ALIAS_RAW)
1741     dump_printf (MSG_NOTE, " RAW");
1742   if (alias_pair->flags & DR_ALIAS_WAR)
1743     dump_printf (MSG_NOTE, " WAR");
1744   if (alias_pair->flags & DR_ALIAS_WAW)
1745     dump_printf (MSG_NOTE, " WAW");
1746   if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1747     dump_printf (MSG_NOTE, " ARBITRARY");
1748   if (alias_pair->flags & DR_ALIAS_SWAPPED)
1749     dump_printf (MSG_NOTE, " SWAPPED");
1750   if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1751     dump_printf (MSG_NOTE, " UNSWAPPED");
1752   if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1753     dump_printf (MSG_NOTE, " MIXED_STEPS");
1754   if (alias_pair->flags == 0)
1755     dump_printf (MSG_NOTE, " <none>");
1756   dump_printf (MSG_NOTE, "\n");
1757 }
1758
1759 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1760    FACTOR is number of iterations that each data reference is accessed.
1761
1762    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1763    we create an expression:
1764
1765    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1766    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1767
1768    for aliasing checks.  However, in some cases we can decrease the number
1769    of checks by combining two checks into one.  For example, suppose we have
1770    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1771    condition is satisfied:
1772
1773    load_ptr_0 < load_ptr_1  &&
1774    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1775
1776    (this condition means, in each iteration of vectorized loop, the accessed
1777    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1778    load_ptr_1.)
1779
1780    we then can use only the following expression to finish the alising checks
1781    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1782
1783    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1784    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1785
1786    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1787    basic address.  */
1788
1789 void
1790 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1791                                poly_uint64)
1792 {
1793   if (alias_pairs->is_empty ())
1794     return;
1795
1796   /* Canonicalize each pair so that the base components are ordered wrt
1797      data_ref_compare_tree.  This allows the loop below to merge more
1798      cases.  */
1799   unsigned int i;
1800   dr_with_seg_len_pair_t *alias_pair;
1801   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1802     {
1803       data_reference_p dr_a = alias_pair->first.dr;
1804       data_reference_p dr_b = alias_pair->second.dr;
1805       int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1806                                             DR_BASE_ADDRESS (dr_b));
1807       if (comp_res == 0)
1808         comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1809       if (comp_res == 0)
1810         comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1811       if (comp_res > 0)
1812         {
1813           std::swap (alias_pair->first, alias_pair->second);
1814           alias_pair->flags |= DR_ALIAS_SWAPPED;
1815         }
1816       else
1817         alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1818     }
1819
1820   /* Sort the collected data ref pairs so that we can scan them once to
1821      combine all possible aliasing checks.  */
1822   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1823
1824   /* Scan the sorted dr pairs and check if we can combine alias checks
1825      of two neighboring dr pairs.  */
1826   unsigned int last = 0;
1827   for (i = 1; i < alias_pairs->length (); ++i)
1828     {
1829       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1830       dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1831       dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1832
1833       dr_with_seg_len *dr_a1 = &alias_pair1->first;
1834       dr_with_seg_len *dr_b1 = &alias_pair1->second;
1835       dr_with_seg_len *dr_a2 = &alias_pair2->first;
1836       dr_with_seg_len *dr_b2 = &alias_pair2->second;
1837
1838       /* Remove duplicate data ref pairs.  */
1839       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1840         {
1841           if (dump_enabled_p ())
1842             dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1843                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1844                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1845           alias_pair1->flags |= alias_pair2->flags;
1846           continue;
1847         }
1848
1849       /* Assume that we won't be able to merge the pairs, then correct
1850          if we do.  */
1851       last += 1;
1852       if (last != i)
1853         (*alias_pairs)[last] = (*alias_pairs)[i];
1854
1855       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1856         {
1857           /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1858              and DR_A1 and DR_A2 are two consecutive memrefs.  */
1859           if (*dr_a1 == *dr_a2)
1860             {
1861               std::swap (dr_a1, dr_b1);
1862               std::swap (dr_a2, dr_b2);
1863             }
1864
1865           poly_int64 init_a1, init_a2;
1866           /* Only consider cases in which the distance between the initial
1867              DR_A1 and the initial DR_A2 is known at compile time.  */
1868           if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1869                                 DR_BASE_ADDRESS (dr_a2->dr), 0)
1870               || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1871                                    DR_OFFSET (dr_a2->dr), 0)
1872               || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1873               || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1874             continue;
1875
1876           /* Don't combine if we can't tell which one comes first.  */
1877           if (!ordered_p (init_a1, init_a2))
1878             continue;
1879
1880           /* Work out what the segment length would be if we did combine
1881              DR_A1 and DR_A2:
1882
1883              - If DR_A1 and DR_A2 have equal lengths, that length is
1884                also the combined length.
1885
1886              - If DR_A1 and DR_A2 both have negative "lengths", the combined
1887                length is the lower bound on those lengths.
1888
1889              - If DR_A1 and DR_A2 both have positive lengths, the combined
1890                length is the upper bound on those lengths.
1891
1892              Other cases are unlikely to give a useful combination.
1893
1894              The lengths both have sizetype, so the sign is taken from
1895              the step instead.  */
1896           poly_uint64 new_seg_len = 0;
1897           bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1898                                                  dr_a2->seg_len, 0);
1899           if (new_seg_len_p)
1900             {
1901               poly_uint64 seg_len_a1, seg_len_a2;
1902               if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1903                   || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1904                 continue;
1905
1906               tree indicator_a = dr_direction_indicator (dr_a1->dr);
1907               if (TREE_CODE (indicator_a) != INTEGER_CST)
1908                 continue;
1909
1910               tree indicator_b = dr_direction_indicator (dr_a2->dr);
1911               if (TREE_CODE (indicator_b) != INTEGER_CST)
1912                 continue;
1913
1914               int sign_a = tree_int_cst_sgn (indicator_a);
1915               int sign_b = tree_int_cst_sgn (indicator_b);
1916
1917               if (sign_a <= 0 && sign_b <= 0)
1918                 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1919               else if (sign_a >= 0 && sign_b >= 0)
1920                 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1921               else
1922                 continue;
1923             }
1924           /* At this point we're committed to merging the refs.  */
1925
1926           /* Make sure dr_a1 starts left of dr_a2.  */
1927           if (maybe_gt (init_a1, init_a2))
1928             {
1929               std::swap (*dr_a1, *dr_a2);
1930               std::swap (init_a1, init_a2);
1931             }
1932
1933           /* The DR_Bs are equal, so only the DR_As can introduce
1934              mixed steps.  */
1935           if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1936             alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1937
1938           if (new_seg_len_p)
1939             {
1940               dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1941                                               new_seg_len);
1942               dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1943             }
1944
1945           /* This is always positive due to the swap above.  */
1946           poly_uint64 diff = init_a2 - init_a1;
1947
1948           /* The new check will start at DR_A1.  Make sure that its access
1949              size encompasses the initial DR_A2.  */
1950           if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1951             {
1952               dr_a1->access_size = upper_bound (dr_a1->access_size,
1953                                                 diff + dr_a2->access_size);
1954               unsigned int new_align = known_alignment (dr_a1->access_size);
1955               dr_a1->align = MIN (dr_a1->align, new_align);
1956             }
1957           if (dump_enabled_p ())
1958             dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1959                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1960                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1961           alias_pair1->flags |= alias_pair2->flags;
1962           last -= 1;
1963         }
1964     }
1965   alias_pairs->truncate (last + 1);
1966
1967   /* Try to restore the original dr_with_seg_len order within each
1968      dr_with_seg_len_pair_t.  If we ended up combining swapped and
1969      unswapped pairs into the same check, we have to invalidate any
1970      RAW, WAR and WAW information for it.  */
1971   if (dump_enabled_p ())
1972     dump_printf (MSG_NOTE, "merged alias checks:\n");
1973   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1974     {
1975       unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1976       unsigned int swapped = (alias_pair->flags & swap_mask);
1977       if (swapped == DR_ALIAS_SWAPPED)
1978         std::swap (alias_pair->first, alias_pair->second);
1979       else if (swapped != DR_ALIAS_UNSWAPPED)
1980         alias_pair->flags |= DR_ALIAS_ARBITRARY;
1981       alias_pair->flags &= ~swap_mask;
1982       if (dump_enabled_p ())
1983         dump_alias_pair (alias_pair, "  ");
1984     }
1985 }
1986
1987 /* A subroutine of create_intersect_range_checks, with a subset of the
1988    same arguments.  Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
1989    to optimize cases in which the references form a simple RAW, WAR or
1990    WAR dependence.  */
1991
1992 static bool
1993 create_ifn_alias_checks (tree *cond_expr,
1994                          const dr_with_seg_len_pair_t &alias_pair)
1995 {
1996   const dr_with_seg_len& dr_a = alias_pair.first;
1997   const dr_with_seg_len& dr_b = alias_pair.second;
1998
1999   /* Check for cases in which:
2000
2001      (a) we have a known RAW, WAR or WAR dependence
2002      (b) the accesses are well-ordered in both the original and new code
2003          (see the comment above the DR_ALIAS_* flags for details); and
2004      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
2005   if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
2006     return false;
2007
2008   /* Make sure that both DRs access the same pattern of bytes,
2009      with a constant length and step.  */
2010   poly_uint64 seg_len;
2011   if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
2012       || !poly_int_tree_p (dr_a.seg_len, &seg_len)
2013       || maybe_ne (dr_a.access_size, dr_b.access_size)
2014       || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
2015       || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
2016     return false;
2017
2018   unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
2019   tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
2020   tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
2021
2022   /* See whether the target suports what we want to do.  WAW checks are
2023      equivalent to WAR checks here.  */
2024   internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
2025                      ? IFN_CHECK_RAW_PTRS
2026                      : IFN_CHECK_WAR_PTRS);
2027   unsigned int align = MIN (dr_a.align, dr_b.align);
2028   poly_uint64 full_length = seg_len + bytes;
2029   if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2030                                            full_length, align))
2031     {
2032       full_length = seg_len + dr_a.access_size;
2033       if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2034                                                full_length, align))
2035         return false;
2036     }
2037
2038   /* Commit to using this form of test.  */
2039   addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
2040   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2041
2042   addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
2043   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2044
2045   *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
2046                                              ifn, boolean_type_node,
2047                                              4, addr_a, addr_b,
2048                                              size_int (full_length),
2049                                              size_int (align));
2050
2051   if (dump_enabled_p ())
2052     {
2053       if (ifn == IFN_CHECK_RAW_PTRS)
2054         dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
2055       else
2056         dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
2057     }
2058   return true;
2059 }
2060
2061 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
2062    free of aliases, using a condition based on index values instead
2063    of a condition based on addresses.  Return true on success,
2064    storing the condition in *COND_EXPR.
2065
2066    This can only be done if the two data references in ALIAS_PAIR access
2067    the same array object and the index is the only difference.  For example,
2068    if the two data references are DR_A and DR_B:
2069
2070                        DR_A                           DR_B
2071       data-ref         arr[i]                         arr[j]
2072       base_object      arr                            arr
2073       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
2074
2075    The addresses and their index are like:
2076
2077         |<- ADDR_A    ->|          |<- ADDR_B    ->|
2078      ------------------------------------------------------->
2079         |   |   |   |   |          |   |   |   |   |
2080      ------------------------------------------------------->
2081         i_0 ...         i_0+4      j_0 ...         j_0+4
2082
2083    We can create expression based on index rather than address:
2084
2085      (unsigned) (i_0 - j_0 + 3) <= 6
2086
2087    i.e. the indices are less than 4 apart.
2088
2089    Note evolution step of index needs to be considered in comparison.  */
2090
2091 static bool
2092 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
2093                                      const dr_with_seg_len_pair_t &alias_pair)
2094 {
2095   const dr_with_seg_len &dr_a = alias_pair.first;
2096   const dr_with_seg_len &dr_b = alias_pair.second;
2097   if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
2098       || integer_zerop (DR_STEP (dr_a.dr))
2099       || integer_zerop (DR_STEP (dr_b.dr))
2100       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2101     return false;
2102
2103   poly_uint64 seg_len1, seg_len2;
2104   if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
2105       || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
2106     return false;
2107
2108   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2109     return false;
2110
2111   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2112     return false;
2113
2114   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2115     return false;
2116
2117   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2118
2119   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2120   unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
2121   if (neg_step)
2122     {
2123       abs_step = -abs_step;
2124       seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
2125       seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
2126     }
2127
2128   /* Infer the number of iterations with which the memory segment is accessed
2129      by DR.  In other words, alias is checked if memory segment accessed by
2130      DR_A in some iterations intersect with memory segment accessed by DR_B
2131      in the same amount iterations.
2132      Note segnment length is a linear function of number of iterations with
2133      DR_STEP as the coefficient.  */
2134   poly_uint64 niter_len1, niter_len2;
2135   if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
2136       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
2137     return false;
2138
2139   /* Divide each access size by the byte step, rounding up.  */
2140   poly_uint64 niter_access1, niter_access2;
2141   if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
2142                         abs_step, &niter_access1)
2143       || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
2144                            abs_step, &niter_access2))
2145     return false;
2146
2147   bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
2148
2149   int found = -1;
2150   for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2151     {
2152       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2153       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2154       /* Two indices must be the same if they are not scev, or not scev wrto
2155          current loop being vecorized.  */
2156       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2157           || TREE_CODE (access2) != POLYNOMIAL_CHREC
2158           || CHREC_VARIABLE (access1) != (unsigned)loop->num
2159           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2160         {
2161           if (operand_equal_p (access1, access2, 0))
2162             continue;
2163
2164           return false;
2165         }
2166       if (found >= 0)
2167         return false;
2168       found = i;
2169     }
2170
2171   /* Ought not to happen in practice, since if all accesses are equal then the
2172      alias should be decidable at compile time.  */
2173   if (found < 0)
2174     return false;
2175
2176   /* The two indices must have the same step.  */
2177   tree access1 = DR_ACCESS_FN (dr_a.dr, found);
2178   tree access2 = DR_ACCESS_FN (dr_b.dr, found);
2179   if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2180     return false;
2181
2182   tree idx_step = CHREC_RIGHT (access1);
2183   /* Index must have const step, otherwise DR_STEP won't be constant.  */
2184   gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2185   /* Index must evaluate in the same direction as DR.  */
2186   gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2187
2188   tree min1 = CHREC_LEFT (access1);
2189   tree min2 = CHREC_LEFT (access2);
2190   if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2191     return false;
2192
2193   /* Ideally, alias can be checked against loop's control IV, but we
2194      need to prove linear mapping between control IV and reference
2195      index.  Although that should be true, we check against (array)
2196      index of data reference.  Like segment length, index length is
2197      linear function of the number of iterations with index_step as
2198      the coefficient, i.e, niter_len * idx_step.  */
2199   offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
2200                                               SIGNED);
2201   if (neg_step)
2202     abs_idx_step = -abs_idx_step;
2203   poly_offset_int idx_len1 = abs_idx_step * niter_len1;
2204   poly_offset_int idx_len2 = abs_idx_step * niter_len2;
2205   poly_offset_int idx_access1 = abs_idx_step * niter_access1;
2206   poly_offset_int idx_access2 = abs_idx_step * niter_access2;
2207
2208   gcc_assert (known_ge (idx_len1, 0)
2209               && known_ge (idx_len2, 0)
2210               && known_ge (idx_access1, 0)
2211               && known_ge (idx_access2, 0));
2212
2213   /* Each access has the following pattern, with lengths measured
2214      in units of INDEX:
2215
2216           <-- idx_len -->
2217           <--- A: -ve step --->
2218           +-----+-------+-----+-------+-----+
2219           | n-1 | ..... |  0  | ..... | n-1 |
2220           +-----+-------+-----+-------+-----+
2221                         <--- B: +ve step --->
2222                         <-- idx_len -->
2223                         |
2224                        min
2225
2226      where "n" is the number of scalar iterations covered by the segment
2227      and where each access spans idx_access units.
2228
2229      A is the range of bytes accessed when the step is negative,
2230      B is the range when the step is positive.
2231
2232      When checking for general overlap, we need to test whether
2233      the range:
2234
2235        [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2236
2237      overlaps:
2238
2239        [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2240
2241      where:
2242
2243         low_offsetN = +ve step ? 0 : -idx_lenN;
2244        high_offsetN = +ve step ? idx_lenN : 0;
2245
2246      This is equivalent to testing whether:
2247
2248        min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2249        && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2250
2251      Converting this into a single test, there is an overlap if:
2252
2253        0 <= min2 - min1 + bias <= limit
2254
2255      where  bias = high_offset2 + idx_access2 - 1 - low_offset1
2256            limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2257                  + (high_offset2 - low_offset2 + idx_access2 - 1)
2258       i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2259
2260      Combining the tests requires limit to be computable in an unsigned
2261      form of the index type; if it isn't, we fall back to the usual
2262      pointer-based checks.
2263
2264      We can do better if DR_B is a write and if DR_A and DR_B are
2265      well-ordered in both the original and the new code (see the
2266      comment above the DR_ALIAS_* flags for details).  In this case
2267      we know that for each i in [0, n-1], the write performed by
2268      access i of DR_B occurs after access numbers j<=i of DR_A in
2269      both the original and the new code.  Any write or anti
2270      dependencies wrt those DR_A accesses are therefore maintained.
2271
2272      We just need to make sure that each individual write in DR_B does not
2273      overlap any higher-indexed access in DR_A; such DR_A accesses happen
2274      after the DR_B access in the original code but happen before it in
2275      the new code.
2276
2277      We know the steps for both accesses are equal, so by induction, we
2278      just need to test whether the first write of DR_B overlaps a later
2279      access of DR_A.  In other words, we need to move min1 along by
2280      one iteration:
2281
2282        min1' = min1 + idx_step
2283
2284      and use the ranges:
2285
2286        [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2287
2288      and:
2289
2290        [min2, min2 + idx_access2 - 1]
2291
2292      where:
2293
2294         low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2295        high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0.  */
2296   if (waw_or_war_p)
2297     idx_len1 -= abs_idx_step;
2298
2299   poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2300   if (!waw_or_war_p)
2301     limit += idx_len2;
2302
2303   tree utype = unsigned_type_for (TREE_TYPE (min1));
2304   if (!wi::fits_to_tree_p (limit, utype))
2305     return false;
2306
2307   poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2308   poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2309   poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2310   /* Equivalent to adding IDX_STEP to MIN1.  */
2311   if (waw_or_war_p)
2312     bias -= wi::to_offset (idx_step);
2313
2314   tree subject = fold_build2 (MINUS_EXPR, utype,
2315                               fold_convert (utype, min2),
2316                               fold_convert (utype, min1));
2317   subject = fold_build2 (PLUS_EXPR, utype, subject,
2318                          wide_int_to_tree (utype, bias));
2319   tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2320                                      wide_int_to_tree (utype, limit));
2321   if (*cond_expr)
2322     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2323                               *cond_expr, part_cond_expr);
2324   else
2325     *cond_expr = part_cond_expr;
2326   if (dump_enabled_p ())
2327     {
2328       if (waw_or_war_p)
2329         dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2330       else
2331         dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2332     }
2333   return true;
2334 }
2335
2336 /* A subroutine of create_intersect_range_checks, with a subset of the
2337    same arguments.  Try to optimize cases in which the second access
2338    is a write and in which some overlap is valid.  */
2339
2340 static bool
2341 create_waw_or_war_checks (tree *cond_expr,
2342                           const dr_with_seg_len_pair_t &alias_pair)
2343 {
2344   const dr_with_seg_len& dr_a = alias_pair.first;
2345   const dr_with_seg_len& dr_b = alias_pair.second;
2346
2347   /* Check for cases in which:
2348
2349      (a) DR_B is always a write;
2350      (b) the accesses are well-ordered in both the original and new code
2351          (see the comment above the DR_ALIAS_* flags for details); and
2352      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
2353   if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2354     return false;
2355
2356   /* Check for equal (but possibly variable) steps.  */
2357   tree step = DR_STEP (dr_a.dr);
2358   if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2359     return false;
2360
2361   /* Make sure that we can operate on sizetype without loss of precision.  */
2362   tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2363   if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2364     return false;
2365
2366   /* All addresses involved are known to have a common alignment ALIGN.
2367      We can therefore subtract ALIGN from an exclusive endpoint to get
2368      an inclusive endpoint.  In the best (and common) case, ALIGN is the
2369      same as the access sizes of both DRs, and so subtracting ALIGN
2370      cancels out the addition of an access size.  */
2371   unsigned int align = MIN (dr_a.align, dr_b.align);
2372   poly_uint64 last_chunk_a = dr_a.access_size - align;
2373   poly_uint64 last_chunk_b = dr_b.access_size - align;
2374
2375   /* Get a boolean expression that is true when the step is negative.  */
2376   tree indicator = dr_direction_indicator (dr_a.dr);
2377   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2378                                fold_convert (ssizetype, indicator),
2379                                ssize_int (0));
2380
2381   /* Get lengths in sizetype.  */
2382   tree seg_len_a
2383     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2384   step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2385
2386   /* Each access has the following pattern:
2387
2388           <- |seg_len| ->
2389           <--- A: -ve step --->
2390           +-----+-------+-----+-------+-----+
2391           | n-1 | ..... |  0  | ..... | n-1 |
2392           +-----+-------+-----+-------+-----+
2393                         <--- B: +ve step --->
2394                         <- |seg_len| ->
2395                         |
2396                    base address
2397
2398      where "n" is the number of scalar iterations covered by the segment.
2399
2400      A is the range of bytes accessed when the step is negative,
2401      B is the range when the step is positive.
2402
2403      We know that DR_B is a write.  We also know (from checking that
2404      DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2405      the write performed by access i of DR_B occurs after access numbers
2406      j<=i of DR_A in both the original and the new code.  Any write or
2407      anti dependencies wrt those DR_A accesses are therefore maintained.
2408
2409      We just need to make sure that each individual write in DR_B does not
2410      overlap any higher-indexed access in DR_A; such DR_A accesses happen
2411      after the DR_B access in the original code but happen before it in
2412      the new code.
2413
2414      We know the steps for both accesses are equal, so by induction, we
2415      just need to test whether the first write of DR_B overlaps a later
2416      access of DR_A.  In other words, we need to move addr_a along by
2417      one iteration:
2418
2419        addr_a' = addr_a + step
2420
2421      and check whether:
2422
2423        [addr_b, addr_b + last_chunk_b]
2424
2425      overlaps:
2426
2427        [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2428
2429      where [low_offset_a, high_offset_a] spans accesses [1, n-1].  I.e.:
2430
2431         low_offset_a = +ve step ? 0 : seg_len_a - step
2432        high_offset_a = +ve step ? seg_len_a - step : 0
2433
2434      This is equivalent to testing whether:
2435
2436        addr_a' + low_offset_a <= addr_b + last_chunk_b
2437        && addr_b <= addr_a' + high_offset_a + last_chunk_a
2438
2439      Converting this into a single test, there is an overlap if:
2440
2441        0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2442
2443      where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2444
2445      If DR_A is performed, limit + |step| - last_chunk_b is known to be
2446      less than the size of the object underlying DR_A.  We also know
2447      that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2448      guaranteed at compile time.  There can therefore be no overflow if
2449      "limit" is calculated in an unsigned type with pointer precision.  */
2450   tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2451                                          DR_OFFSET (dr_a.dr));
2452   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2453
2454   tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2455                                          DR_OFFSET (dr_b.dr));
2456   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2457
2458   /* Advance ADDR_A by one iteration and adjust the length to compensate.  */
2459   addr_a = fold_build_pointer_plus (addr_a, step);
2460   tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2461                                            seg_len_a, step);
2462   if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2463     seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2464
2465   tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2466                                    seg_len_a_minus_step, size_zero_node);
2467   if (!CONSTANT_CLASS_P (low_offset_a))
2468     low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2469
2470   /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2471      but it's usually more efficient to reuse the LOW_OFFSET_A result.  */
2472   tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2473                                     low_offset_a);
2474
2475   /* The amount added to addr_b - addr_a'.  */
2476   tree bias = fold_build2 (MINUS_EXPR, sizetype,
2477                            size_int (last_chunk_b), low_offset_a);
2478
2479   tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2480   limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2481                        size_int (last_chunk_a + last_chunk_b));
2482
2483   tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
2484   subject = fold_build2 (PLUS_EXPR, sizetype,
2485                          fold_convert (sizetype, subject), bias);
2486
2487   *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2488   if (dump_enabled_p ())
2489     dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2490   return true;
2491 }
2492
2493 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2494    every address ADDR accessed by D:
2495
2496      *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2497
2498    In this case, every element accessed by D is aligned to at least
2499    ALIGN bytes.
2500
2501    If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2502
2503      *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT.  */
2504
2505 static void
2506 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2507                      tree *seg_max_out, HOST_WIDE_INT align)
2508 {
2509   /* Each access has the following pattern:
2510
2511           <- |seg_len| ->
2512           <--- A: -ve step --->
2513           +-----+-------+-----+-------+-----+
2514           | n-1 | ,.... |  0  | ..... | n-1 |
2515           +-----+-------+-----+-------+-----+
2516                         <--- B: +ve step --->
2517                         <- |seg_len| ->
2518                         |
2519                    base address
2520
2521      where "n" is the number of scalar iterations covered by the segment.
2522      (This should be VF for a particular pair if we know that both steps
2523      are the same, otherwise it will be the full number of scalar loop
2524      iterations.)
2525
2526      A is the range of bytes accessed when the step is negative,
2527      B is the range when the step is positive.
2528
2529      If the access size is "access_size" bytes, the lowest addressed byte is:
2530
2531          base + (step < 0 ? seg_len : 0)   [LB]
2532
2533      and the highest addressed byte is always below:
2534
2535          base + (step < 0 ? 0 : seg_len) + access_size   [UB]
2536
2537      Thus:
2538
2539          LB <= ADDR < UB
2540
2541      If ALIGN is nonzero, all three values are aligned to at least ALIGN
2542      bytes, so:
2543
2544          LB <= ADDR <= UB - ALIGN
2545
2546      where "- ALIGN" folds naturally with the "+ access_size" and often
2547      cancels it out.
2548
2549      We don't try to simplify LB and UB beyond this (e.g. by using
2550      MIN and MAX based on whether seg_len rather than the stride is
2551      negative) because it is possible for the absolute size of the
2552      segment to overflow the range of a ssize_t.
2553
2554      Keeping the pointer_plus outside of the cond_expr should allow
2555      the cond_exprs to be shared with other alias checks.  */
2556   tree indicator = dr_direction_indicator (d.dr);
2557   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2558                                fold_convert (ssizetype, indicator),
2559                                ssize_int (0));
2560   tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2561                                             DR_OFFSET (d.dr));
2562   addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2563   tree seg_len
2564     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2565
2566   tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2567                                 seg_len, size_zero_node);
2568   tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2569                                 size_zero_node, seg_len);
2570   max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2571                            size_int (d.access_size - align));
2572
2573   *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2574   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2575 }
2576
2577 /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2578    storing the condition in *COND_EXPR.  The fallback is to generate a
2579    a test that the two accesses do not overlap:
2580
2581      end_a <= start_b || end_b <= start_a.  */
2582
2583 static void
2584 create_intersect_range_checks (class loop *loop, tree *cond_expr,
2585                                const dr_with_seg_len_pair_t &alias_pair)
2586 {
2587   const dr_with_seg_len& dr_a = alias_pair.first;
2588   const dr_with_seg_len& dr_b = alias_pair.second;
2589   *cond_expr = NULL_TREE;
2590   if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2591     return;
2592
2593   if (create_ifn_alias_checks (cond_expr, alias_pair))
2594     return;
2595
2596   if (create_waw_or_war_checks (cond_expr, alias_pair))
2597     return;
2598
2599   unsigned HOST_WIDE_INT min_align;
2600   tree_code cmp_code;
2601   /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2602      are equivalent.  This is just an optimization heuristic.  */
2603   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2604       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2605     {
2606       /* In this case adding access_size to seg_len is likely to give
2607          a simple X * step, where X is either the number of scalar
2608          iterations or the vectorization factor.  We're better off
2609          keeping that, rather than subtracting an alignment from it.
2610
2611          In this case the maximum values are exclusive and so there is
2612          no alias if the maximum of one segment equals the minimum
2613          of another.  */
2614       min_align = 0;
2615       cmp_code = LE_EXPR;
2616     }
2617   else
2618     {
2619       /* Calculate the minimum alignment shared by all four pointers,
2620          then arrange for this alignment to be subtracted from the
2621          exclusive maximum values to get inclusive maximum values.
2622          This "- min_align" is cumulative with a "+ access_size"
2623          in the calculation of the maximum values.  In the best
2624          (and common) case, the two cancel each other out, leaving
2625          us with an inclusive bound based only on seg_len.  In the
2626          worst case we're simply adding a smaller number than before.
2627
2628          Because the maximum values are inclusive, there is an alias
2629          if the maximum value of one segment is equal to the minimum
2630          value of the other.  */
2631       min_align = MIN (dr_a.align, dr_b.align);
2632       cmp_code = LT_EXPR;
2633     }
2634
2635   tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2636   get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2637   get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2638
2639   *cond_expr
2640     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2641         fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2642         fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2643   if (dump_enabled_p ())
2644     dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2645 }
2646
2647 /* Create a conditional expression that represents the run-time checks for
2648    overlapping of address ranges represented by a list of data references
2649    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
2650    COND_EXPR is the conditional expression to be used in the if statement
2651    that controls which version of the loop gets executed at runtime.  */
2652
2653 void
2654 create_runtime_alias_checks (class loop *loop,
2655                              const vec<dr_with_seg_len_pair_t> *alias_pairs,
2656                              tree * cond_expr)
2657 {
2658   tree part_cond_expr;
2659
2660   fold_defer_overflow_warnings ();
2661   for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
2662     {
2663       gcc_assert (alias_pair.flags);
2664       if (dump_enabled_p ())
2665         dump_printf (MSG_NOTE,
2666                      "create runtime check for data references %T and %T\n",
2667                      DR_REF (alias_pair.first.dr),
2668                      DR_REF (alias_pair.second.dr));
2669
2670       /* Create condition expression for each pair data references.  */
2671       create_intersect_range_checks (loop, &part_cond_expr, alias_pair);
2672       if (*cond_expr)
2673         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2674                                   *cond_expr, part_cond_expr);
2675       else
2676         *cond_expr = part_cond_expr;
2677     }
2678   fold_undefer_and_ignore_overflow_warnings ();
2679 }
2680
2681 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2682    expressions.  */
2683 static bool
2684 dr_equal_offsets_p1 (tree offset1, tree offset2)
2685 {
2686   bool res;
2687
2688   STRIP_NOPS (offset1);
2689   STRIP_NOPS (offset2);
2690
2691   if (offset1 == offset2)
2692     return true;
2693
2694   if (TREE_CODE (offset1) != TREE_CODE (offset2)
2695       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2696     return false;
2697
2698   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2699                              TREE_OPERAND (offset2, 0));
2700
2701   if (!res || !BINARY_CLASS_P (offset1))
2702     return res;
2703
2704   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2705                              TREE_OPERAND (offset2, 1));
2706
2707   return res;
2708 }
2709
2710 /* Check if DRA and DRB have equal offsets.  */
2711 bool
2712 dr_equal_offsets_p (struct data_reference *dra,
2713                     struct data_reference *drb)
2714 {
2715   tree offset1, offset2;
2716
2717   offset1 = DR_OFFSET (dra);
2718   offset2 = DR_OFFSET (drb);
2719
2720   return dr_equal_offsets_p1 (offset1, offset2);
2721 }
2722
2723 /* Returns true if FNA == FNB.  */
2724
2725 static bool
2726 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2727 {
2728   unsigned i, n = fna.length ();
2729
2730   if (n != fnb.length ())
2731     return false;
2732
2733   for (i = 0; i < n; i++)
2734     if (!operand_equal_p (fna[i], fnb[i], 0))
2735       return false;
2736
2737   return true;
2738 }
2739
2740 /* If all the functions in CF are the same, returns one of them,
2741    otherwise returns NULL.  */
2742
2743 static affine_fn
2744 common_affine_function (conflict_function *cf)
2745 {
2746   unsigned i;
2747   affine_fn comm;
2748
2749   if (!CF_NONTRIVIAL_P (cf))
2750     return affine_fn ();
2751
2752   comm = cf->fns[0];
2753
2754   for (i = 1; i < cf->n; i++)
2755     if (!affine_function_equal_p (comm, cf->fns[i]))
2756       return affine_fn ();
2757
2758   return comm;
2759 }
2760
2761 /* Returns the base of the affine function FN.  */
2762
2763 static tree
2764 affine_function_base (affine_fn fn)
2765 {
2766   return fn[0];
2767 }
2768
2769 /* Returns true if FN is a constant.  */
2770
2771 static bool
2772 affine_function_constant_p (affine_fn fn)
2773 {
2774   unsigned i;
2775   tree coef;
2776
2777   for (i = 1; fn.iterate (i, &coef); i++)
2778     if (!integer_zerop (coef))
2779       return false;
2780
2781   return true;
2782 }
2783
2784 /* Returns true if FN is the zero constant function.  */
2785
2786 static bool
2787 affine_function_zero_p (affine_fn fn)
2788 {
2789   return (integer_zerop (affine_function_base (fn))
2790           && affine_function_constant_p (fn));
2791 }
2792
2793 /* Returns a signed integer type with the largest precision from TA
2794    and TB.  */
2795
2796 static tree
2797 signed_type_for_types (tree ta, tree tb)
2798 {
2799   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2800     return signed_type_for (ta);
2801   else
2802     return signed_type_for (tb);
2803 }
2804
2805 /* Applies operation OP on affine functions FNA and FNB, and returns the
2806    result.  */
2807
2808 static affine_fn
2809 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2810 {
2811   unsigned i, n, m;
2812   affine_fn ret;
2813   tree coef;
2814
2815   if (fnb.length () > fna.length ())
2816     {
2817       n = fna.length ();
2818       m = fnb.length ();
2819     }
2820   else
2821     {
2822       n = fnb.length ();
2823       m = fna.length ();
2824     }
2825
2826   ret.create (m);
2827   for (i = 0; i < n; i++)
2828     {
2829       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2830                                          TREE_TYPE (fnb[i]));
2831       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2832     }
2833
2834   for (; fna.iterate (i, &coef); i++)
2835     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2836                                  coef, integer_zero_node));
2837   for (; fnb.iterate (i, &coef); i++)
2838     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2839                                  integer_zero_node, coef));
2840
2841   return ret;
2842 }
2843
2844 /* Returns the sum of affine functions FNA and FNB.  */
2845
2846 static affine_fn
2847 affine_fn_plus (affine_fn fna, affine_fn fnb)
2848 {
2849   return affine_fn_op (PLUS_EXPR, fna, fnb);
2850 }
2851
2852 /* Returns the difference of affine functions FNA and FNB.  */
2853
2854 static affine_fn
2855 affine_fn_minus (affine_fn fna, affine_fn fnb)
2856 {
2857   return affine_fn_op (MINUS_EXPR, fna, fnb);
2858 }
2859
2860 /* Frees affine function FN.  */
2861
2862 static void
2863 affine_fn_free (affine_fn fn)
2864 {
2865   fn.release ();
2866 }
2867
2868 /* Determine for each subscript in the data dependence relation DDR
2869    the distance.  */
2870
2871 static void
2872 compute_subscript_distance (struct data_dependence_relation *ddr)
2873 {
2874   conflict_function *cf_a, *cf_b;
2875   affine_fn fn_a, fn_b, diff;
2876
2877   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2878     {
2879       unsigned int i;
2880
2881       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2882         {
2883           struct subscript *subscript;
2884
2885           subscript = DDR_SUBSCRIPT (ddr, i);
2886           cf_a = SUB_CONFLICTS_IN_A (subscript);
2887           cf_b = SUB_CONFLICTS_IN_B (subscript);
2888
2889           fn_a = common_affine_function (cf_a);
2890           fn_b = common_affine_function (cf_b);
2891           if (!fn_a.exists () || !fn_b.exists ())
2892             {
2893               SUB_DISTANCE (subscript) = chrec_dont_know;
2894               return;
2895             }
2896           diff = affine_fn_minus (fn_a, fn_b);
2897
2898           if (affine_function_constant_p (diff))
2899             SUB_DISTANCE (subscript) = affine_function_base (diff);
2900           else
2901             SUB_DISTANCE (subscript) = chrec_dont_know;
2902
2903           affine_fn_free (diff);
2904         }
2905     }
2906 }
2907
2908 /* Returns the conflict function for "unknown".  */
2909
2910 static conflict_function *
2911 conflict_fn_not_known (void)
2912 {
2913   conflict_function *fn = XCNEW (conflict_function);
2914   fn->n = NOT_KNOWN;
2915
2916   return fn;
2917 }
2918
2919 /* Returns the conflict function for "independent".  */
2920
2921 static conflict_function *
2922 conflict_fn_no_dependence (void)
2923 {
2924   conflict_function *fn = XCNEW (conflict_function);
2925   fn->n = NO_DEPENDENCE;
2926
2927   return fn;
2928 }
2929
2930 /* Returns true if the address of OBJ is invariant in LOOP.  */
2931
2932 static bool
2933 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2934 {
2935   while (handled_component_p (obj))
2936     {
2937       if (TREE_CODE (obj) == ARRAY_REF)
2938         {
2939           for (int i = 1; i < 4; ++i)
2940             if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2941                                                         loop->num))
2942               return false;
2943         }
2944       else if (TREE_CODE (obj) == COMPONENT_REF)
2945         {
2946           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2947                                                       loop->num))
2948             return false;
2949         }
2950       obj = TREE_OPERAND (obj, 0);
2951     }
2952
2953   if (!INDIRECT_REF_P (obj)
2954       && TREE_CODE (obj) != MEM_REF)
2955     return true;
2956
2957   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2958                                                   loop->num);
2959 }
2960
2961 /* Returns false if we can prove that data references A and B do not alias,
2962    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2963    considered.  */
2964
2965 bool
2966 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2967                 class loop *loop_nest)
2968 {
2969   tree addr_a = DR_BASE_OBJECT (a);
2970   tree addr_b = DR_BASE_OBJECT (b);
2971
2972   /* If we are not processing a loop nest but scalar code we
2973      do not need to care about possible cross-iteration dependences
2974      and thus can process the full original reference.  Do so,
2975      similar to how loop invariant motion applies extra offset-based
2976      disambiguation.  */
2977   if (!loop_nest)
2978     {
2979       tree tree_size_a = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a)));
2980       tree tree_size_b = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b)));
2981
2982       if (DR_BASE_ADDRESS (a)
2983           && DR_BASE_ADDRESS (b)
2984           && operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b))
2985           && operand_equal_p (DR_OFFSET (a), DR_OFFSET (b))
2986           && poly_int_tree_p (tree_size_a)
2987           && poly_int_tree_p (tree_size_b)
2988           && !ranges_maybe_overlap_p (wi::to_poly_widest (DR_INIT (a)),
2989                                       wi::to_poly_widest (tree_size_a),
2990                                       wi::to_poly_widest (DR_INIT (b)),
2991                                       wi::to_poly_widest (tree_size_b)))
2992         {
2993           gcc_assert (integer_zerop (DR_STEP (a))
2994                       && integer_zerop (DR_STEP (b)));
2995           return false;
2996         }
2997
2998       aff_tree off1, off2;
2999       poly_widest_int size1, size2;
3000       get_inner_reference_aff (DR_REF (a), &off1, &size1);
3001       get_inner_reference_aff (DR_REF (b), &off2, &size2);
3002       aff_combination_scale (&off1, -1);
3003       aff_combination_add (&off2, &off1);
3004       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
3005         return false;
3006     }
3007
3008   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
3009       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
3010       /* For cross-iteration dependences the cliques must be valid for the
3011          whole loop, not just individual iterations.  */
3012       && (!loop_nest
3013           || MR_DEPENDENCE_CLIQUE (addr_a) == 1
3014           || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
3015       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
3016       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
3017     return false;
3018
3019   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
3020      do not know the size of the base-object.  So we cannot do any
3021      offset/overlap based analysis but have to rely on points-to
3022      information only.  */
3023   if (TREE_CODE (addr_a) == MEM_REF
3024       && (DR_UNCONSTRAINED_BASE (a)
3025           || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
3026     {
3027       /* For true dependences we can apply TBAA.  */
3028       if (flag_strict_aliasing
3029           && DR_IS_WRITE (a) && DR_IS_READ (b)
3030           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3031                                      get_alias_set (DR_REF (b))))
3032         return false;
3033       if (TREE_CODE (addr_b) == MEM_REF)
3034         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3035                                        TREE_OPERAND (addr_b, 0));
3036       else
3037         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3038                                        build_fold_addr_expr (addr_b));
3039     }
3040   else if (TREE_CODE (addr_b) == MEM_REF
3041            && (DR_UNCONSTRAINED_BASE (b)
3042                || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
3043     {
3044       /* For true dependences we can apply TBAA.  */
3045       if (flag_strict_aliasing
3046           && DR_IS_WRITE (a) && DR_IS_READ (b)
3047           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3048                                      get_alias_set (DR_REF (b))))
3049         return false;
3050       if (TREE_CODE (addr_a) == MEM_REF)
3051         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3052                                        TREE_OPERAND (addr_b, 0));
3053       else
3054         return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3055                                        TREE_OPERAND (addr_b, 0));
3056     }
3057
3058   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3059      that is being subsetted in the loop nest.  */
3060   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
3061     return refs_output_dependent_p (addr_a, addr_b);
3062   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
3063     return refs_anti_dependent_p (addr_a, addr_b);
3064   return refs_may_alias_p (addr_a, addr_b);
3065 }
3066
3067 /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
3068    if it is meaningful to compare their associated access functions
3069    when checking for dependencies.  */
3070
3071 static bool
3072 access_fn_components_comparable_p (tree ref_a, tree ref_b)
3073 {
3074   /* Allow pairs of component refs from the following sets:
3075
3076        { REALPART_EXPR, IMAGPART_EXPR }
3077        { COMPONENT_REF }
3078        { ARRAY_REF }.  */
3079   tree_code code_a = TREE_CODE (ref_a);
3080   tree_code code_b = TREE_CODE (ref_b);
3081   if (code_a == IMAGPART_EXPR)
3082     code_a = REALPART_EXPR;
3083   if (code_b == IMAGPART_EXPR)
3084     code_b = REALPART_EXPR;
3085   if (code_a != code_b)
3086     return false;
3087
3088   if (TREE_CODE (ref_a) == COMPONENT_REF)
3089     /* ??? We cannot simply use the type of operand #0 of the refs here as
3090        the Fortran compiler smuggles type punning into COMPONENT_REFs.
3091        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
3092     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
3093             == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
3094
3095   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
3096                              TREE_TYPE (TREE_OPERAND (ref_b, 0)));
3097 }
3098
3099 /* Initialize a data dependence relation RES in LOOP_NEST.  USE_ALT_INDICES
3100    is true when the main indices of A and B were not comparable so we try again
3101    with alternate indices computed on an indirect reference.  */
3102
3103 struct data_dependence_relation *
3104 initialize_data_dependence_relation (struct data_dependence_relation *res,
3105                                      vec<loop_p> loop_nest,
3106                                      bool use_alt_indices)
3107 {
3108   struct data_reference *a = DDR_A (res);
3109   struct data_reference *b = DDR_B (res);
3110   unsigned int i;
3111
3112   struct indices *indices_a = &a->indices;
3113   struct indices *indices_b = &b->indices;
3114   if (use_alt_indices)
3115     {
3116       if (TREE_CODE (DR_REF (a)) != MEM_REF)
3117         indices_a = &a->alt_indices;
3118       if (TREE_CODE (DR_REF (b)) != MEM_REF)
3119         indices_b = &b->alt_indices;
3120     }
3121   unsigned int num_dimensions_a = indices_a->access_fns.length ();
3122   unsigned int num_dimensions_b = indices_b->access_fns.length ();
3123   if (num_dimensions_a == 0 || num_dimensions_b == 0)
3124     {
3125       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3126       return res;
3127     }
3128
3129   /* For unconstrained bases, the root (highest-indexed) subscript
3130      describes a variation in the base of the original DR_REF rather
3131      than a component access.  We have no type that accurately describes
3132      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3133      applying this subscript) so limit the search to the last real
3134      component access.
3135
3136      E.g. for:
3137
3138         void
3139         f (int a[][8], int b[][8])
3140         {
3141           for (int i = 0; i < 8; ++i)
3142             a[i * 2][0] = b[i][0];
3143         }
3144
3145      the a and b accesses have a single ARRAY_REF component reference [0]
3146      but have two subscripts.  */
3147   if (indices_a->unconstrained_base)
3148     num_dimensions_a -= 1;
3149   if (indices_b->unconstrained_base)
3150     num_dimensions_b -= 1;
3151
3152   /* These structures describe sequences of component references in
3153      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
3154      specific access function.  */
3155   struct {
3156     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3157        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3158        indices.  In C notation, these are the indices of the rightmost
3159        component references; e.g. for a sequence .b.c.d, the start
3160        index is for .d.  */
3161     unsigned int start_a;
3162     unsigned int start_b;
3163
3164     /* The sequence contains LENGTH consecutive access functions from
3165        each DR.  */
3166     unsigned int length;
3167
3168     /* The enclosing objects for the A and B sequences respectively,
3169        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3170        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
3171     tree object_a;
3172     tree object_b;
3173   } full_seq = {}, struct_seq = {};
3174
3175   /* Before each iteration of the loop:
3176
3177      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3178      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
3179   unsigned int index_a = 0;
3180   unsigned int index_b = 0;
3181   tree ref_a = DR_REF (a);
3182   tree ref_b = DR_REF (b);
3183
3184   /* Now walk the component references from the final DR_REFs back up to
3185      the enclosing base objects.  Each component reference corresponds
3186      to one access function in the DR, with access function 0 being for
3187      the final DR_REF and the highest-indexed access function being the
3188      one that is applied to the base of the DR.
3189
3190      Look for a sequence of component references whose access functions
3191      are comparable (see access_fn_components_comparable_p).  If more
3192      than one such sequence exists, pick the one nearest the base
3193      (which is the leftmost sequence in C notation).  Store this sequence
3194      in FULL_SEQ.
3195
3196      For example, if we have:
3197
3198         struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3199
3200         A: a[0][i].s.c.d
3201         B: __real b[0][i].s.e[i].f
3202
3203      (where d is the same type as the real component of f) then the access
3204      functions would be:
3205
3206                          0   1   2   3
3207         A:              .d  .c  .s [i]
3208
3209                  0   1   2   3   4   5
3210         B:  __real  .f [i]  .e  .s [i]
3211
3212      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3213      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
3214      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
3215      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3216      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
3217      index foo[10] arrays, so is again comparable.  The sequence is
3218      therefore:
3219
3220         A: [1, 3]  (i.e. [i].s.c)
3221         B: [3, 5]  (i.e. [i].s.e)
3222
3223      Also look for sequences of component references whose access
3224      functions are comparable and whose enclosing objects have the same
3225      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
3226      example, STRUCT_SEQ would be:
3227
3228         A: [1, 2]  (i.e. s.c)
3229         B: [3, 4]  (i.e. s.e)  */
3230   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
3231     {
3232       /* The alternate indices form always has a single dimension
3233          with unconstrained base.  */
3234       gcc_assert (!use_alt_indices);
3235
3236       /* REF_A and REF_B must be one of the component access types
3237          allowed by dr_analyze_indices.  */
3238       gcc_checking_assert (access_fn_component_p (ref_a));
3239       gcc_checking_assert (access_fn_component_p (ref_b));
3240
3241       /* Get the immediately-enclosing objects for REF_A and REF_B,
3242          i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3243          and DR_ACCESS_FN (B, INDEX_B).  */
3244       tree object_a = TREE_OPERAND (ref_a, 0);
3245       tree object_b = TREE_OPERAND (ref_b, 0);
3246
3247       tree type_a = TREE_TYPE (object_a);
3248       tree type_b = TREE_TYPE (object_b);
3249       if (access_fn_components_comparable_p (ref_a, ref_b))
3250         {
3251           /* This pair of component accesses is comparable for dependence
3252              analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3253              DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
3254           if (full_seq.start_a + full_seq.length != index_a
3255               || full_seq.start_b + full_seq.length != index_b)
3256             {
3257               /* The accesses don't extend the current sequence,
3258                  so start a new one here.  */
3259               full_seq.start_a = index_a;
3260               full_seq.start_b = index_b;
3261               full_seq.length = 0;
3262             }
3263
3264           /* Add this pair of references to the sequence.  */
3265           full_seq.length += 1;
3266           full_seq.object_a = object_a;
3267           full_seq.object_b = object_b;
3268
3269           /* If the enclosing objects are structures (and thus have the
3270              same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
3271           if (TREE_CODE (type_a) == RECORD_TYPE)
3272             struct_seq = full_seq;
3273
3274           /* Move to the next containing reference for both A and B.  */
3275           ref_a = object_a;
3276           ref_b = object_b;
3277           index_a += 1;
3278           index_b += 1;
3279           continue;
3280         }
3281
3282       /* Try to approach equal type sizes.  */
3283       if (!COMPLETE_TYPE_P (type_a)
3284           || !COMPLETE_TYPE_P (type_b)
3285           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3286           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3287         break;
3288
3289       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3290       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3291       if (size_a <= size_b)
3292         {
3293           index_a += 1;
3294           ref_a = object_a;
3295         }
3296       if (size_b <= size_a)
3297         {
3298           index_b += 1;
3299           ref_b = object_b;
3300         }
3301     }
3302
3303   /* See whether FULL_SEQ ends at the base and whether the two bases
3304      are equal.  We do not care about TBAA or alignment info so we can
3305      use OEP_ADDRESS_OF to avoid false negatives.  */
3306   tree base_a = indices_a->base_object;
3307   tree base_b = indices_b->base_object;
3308   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3309                       && full_seq.start_b + full_seq.length == num_dimensions_b
3310                       && (indices_a->unconstrained_base
3311                           == indices_b->unconstrained_base)
3312                       && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3313                       && (types_compatible_p (TREE_TYPE (base_a),
3314                                               TREE_TYPE (base_b))
3315                           || (!base_supports_access_fn_components_p (base_a)
3316                               && !base_supports_access_fn_components_p (base_b)
3317                               && operand_equal_p
3318                                    (TYPE_SIZE (TREE_TYPE (base_a)),
3319                                     TYPE_SIZE (TREE_TYPE (base_b)), 0)))
3320                       && (!loop_nest.exists ()
3321                           || (object_address_invariant_in_loop_p
3322                               (loop_nest[0], base_a))));
3323
3324   /* If the bases are the same, we can include the base variation too.
3325      E.g. the b accesses in:
3326
3327        for (int i = 0; i < n; ++i)
3328          b[i + 4][0] = b[i][0];
3329
3330      have a definite dependence distance of 4, while for:
3331
3332        for (int i = 0; i < n; ++i)
3333          a[i + 4][0] = b[i][0];
3334
3335      the dependence distance depends on the gap between a and b.
3336
3337      If the bases are different then we can only rely on the sequence
3338      rooted at a structure access, since arrays are allowed to overlap
3339      arbitrarily and change shape arbitrarily.  E.g. we treat this as
3340      valid code:
3341
3342        int a[256];
3343        ...
3344        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3345
3346      where two lvalues with the same int[4][3] type overlap, and where
3347      both lvalues are distinct from the object's declared type.  */
3348   if (same_base_p)
3349     {
3350       if (indices_a->unconstrained_base)
3351         full_seq.length += 1;
3352     }
3353   else
3354     full_seq = struct_seq;
3355
3356   /* Punt if we didn't find a suitable sequence.  */
3357   if (full_seq.length == 0)
3358     {
3359       if (use_alt_indices
3360           || (TREE_CODE (DR_REF (a)) == MEM_REF
3361               && TREE_CODE (DR_REF (b)) == MEM_REF)
3362           || may_be_nonaddressable_p (DR_REF (a))
3363           || may_be_nonaddressable_p (DR_REF (b)))
3364         {
3365           /* Fully exhausted possibilities.  */
3366           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3367           return res;
3368         }
3369
3370       /* Try evaluating both DRs as dereferences of pointers.  */
3371       if (!a->alt_indices.base_object
3372           && TREE_CODE (DR_REF (a)) != MEM_REF)
3373         {
3374           tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
3375                                  build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
3376                                  build_int_cst
3377                                    (reference_alias_ptr_type (DR_REF (a)), 0));
3378           dr_analyze_indices (&a->alt_indices, alt_ref,
3379                               loop_preheader_edge (loop_nest[0]),
3380                               loop_containing_stmt (DR_STMT (a)));
3381         }
3382       if (!b->alt_indices.base_object
3383           && TREE_CODE (DR_REF (b)) != MEM_REF)
3384         {
3385           tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
3386                                  build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
3387                                  build_int_cst
3388                                    (reference_alias_ptr_type (DR_REF (b)), 0));
3389           dr_analyze_indices (&b->alt_indices, alt_ref,
3390                               loop_preheader_edge (loop_nest[0]),
3391                               loop_containing_stmt (DR_STMT (b)));
3392         }
3393       return initialize_data_dependence_relation (res, loop_nest, true);
3394     }
3395
3396   if (!same_base_p)
3397     {
3398       /* Partial overlap is possible for different bases when strict aliasing
3399          is not in effect.  It's also possible if either base involves a union
3400          access; e.g. for:
3401
3402            struct s1 { int a[2]; };
3403            struct s2 { struct s1 b; int c; };
3404            struct s3 { int d; struct s1 e; };
3405            union u { struct s2 f; struct s3 g; } *p, *q;
3406
3407          the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3408          "p->g.e" (base "p->g") and might partially overlap the s1 at
3409          "q->g.e" (base "q->g").  */
3410       if (!flag_strict_aliasing
3411           || ref_contains_union_access_p (full_seq.object_a)
3412           || ref_contains_union_access_p (full_seq.object_b))
3413         {
3414           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3415           return res;
3416         }
3417
3418       DDR_COULD_BE_INDEPENDENT_P (res) = true;
3419       if (!loop_nest.exists ()
3420           || (object_address_invariant_in_loop_p (loop_nest[0],
3421                                                   full_seq.object_a)
3422               && object_address_invariant_in_loop_p (loop_nest[0],
3423                                                      full_seq.object_b)))
3424         {
3425           DDR_OBJECT_A (res) = full_seq.object_a;
3426           DDR_OBJECT_B (res) = full_seq.object_b;
3427         }
3428     }
3429
3430   DDR_AFFINE_P (res) = true;
3431   DDR_ARE_DEPENDENT (res) = NULL_TREE;
3432   DDR_SUBSCRIPTS (res).create (full_seq.length);
3433   DDR_LOOP_NEST (res) = loop_nest;
3434   DDR_SELF_REFERENCE (res) = false;
3435
3436   for (i = 0; i < full_seq.length; ++i)
3437     {
3438       struct subscript *subscript;
3439
3440       subscript = XNEW (struct subscript);
3441       SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
3442       SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
3443       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3444       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3445       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3446       SUB_DISTANCE (subscript) = chrec_dont_know;
3447       DDR_SUBSCRIPTS (res).safe_push (subscript);
3448     }
3449
3450   return res;
3451 }
3452
3453 /* Initialize a data dependence relation between data accesses A and
3454    B.  NB_LOOPS is the number of loops surrounding the references: the
3455    size of the classic distance/direction vectors.  */
3456
3457 struct data_dependence_relation *
3458 initialize_data_dependence_relation (struct data_reference *a,
3459                                      struct data_reference *b,
3460                                      vec<loop_p> loop_nest)
3461 {
3462   data_dependence_relation *res = XCNEW (struct data_dependence_relation);
3463   DDR_A (res) = a;
3464   DDR_B (res) = b;
3465   DDR_LOOP_NEST (res).create (0);
3466   DDR_SUBSCRIPTS (res).create (0);
3467   DDR_DIR_VECTS (res).create (0);
3468   DDR_DIST_VECTS (res).create (0);
3469
3470   if (a == NULL || b == NULL)
3471     {
3472       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3473       return res;
3474     }
3475
3476   /* If the data references do not alias, then they are independent.  */
3477   if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
3478     {
3479       DDR_ARE_DEPENDENT (res) = chrec_known;
3480       return res;
3481     }
3482
3483   return initialize_data_dependence_relation (res, loop_nest, false);
3484 }
3485
3486
3487 /* Frees memory used by the conflict function F.  */
3488
3489 static void
3490 free_conflict_function (conflict_function *f)
3491 {
3492   unsigned i;
3493
3494   if (CF_NONTRIVIAL_P (f))
3495     {
3496       for (i = 0; i < f->n; i++)
3497         affine_fn_free (f->fns[i]);
3498     }
3499   free (f);
3500 }
3501
3502 /* Frees memory used by SUBSCRIPTS.  */
3503
3504 static void
3505 free_subscripts (vec<subscript_p> subscripts)
3506 {
3507   for (subscript_p s : subscripts)
3508     {
3509       free_conflict_function (s->conflicting_iterations_in_a);
3510       free_conflict_function (s->conflicting_iterations_in_b);
3511       free (s);
3512     }
3513   subscripts.release ();
3514 }
3515
3516 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3517    description.  */
3518
3519 static inline void
3520 finalize_ddr_dependent (struct data_dependence_relation *ddr,
3521                         tree chrec)
3522 {
3523   DDR_ARE_DEPENDENT (ddr) = chrec;
3524   free_subscripts (DDR_SUBSCRIPTS (ddr));
3525   DDR_SUBSCRIPTS (ddr).create (0);
3526 }
3527
3528 /* The dependence relation DDR cannot be represented by a distance
3529    vector.  */
3530
3531 static inline void
3532 non_affine_dependence_relation (struct data_dependence_relation *ddr)
3533 {
3534   if (dump_file && (dump_flags & TDF_DETAILS))
3535     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3536
3537   DDR_AFFINE_P (ddr) = false;
3538 }
3539
3540 \f
3541
3542 /* This section contains the classic Banerjee tests.  */
3543
3544 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3545    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
3546
3547 static inline bool
3548 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3549 {
3550   return (evolution_function_is_constant_p (chrec_a)
3551           && evolution_function_is_constant_p (chrec_b));
3552 }
3553
3554 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3555    variable, i.e., if the SIV (Single Index Variable) test is true.  */
3556
3557 static bool
3558 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3559 {
3560   if ((evolution_function_is_constant_p (chrec_a)
3561        && evolution_function_is_univariate_p (chrec_b))
3562       || (evolution_function_is_constant_p (chrec_b)
3563           && evolution_function_is_univariate_p (chrec_a)))
3564     return true;
3565
3566   if (evolution_function_is_univariate_p (chrec_a)
3567       && evolution_function_is_univariate_p (chrec_b))
3568     {
3569       switch (TREE_CODE (chrec_a))
3570         {
3571         case POLYNOMIAL_CHREC:
3572           switch (TREE_CODE (chrec_b))
3573             {
3574             case POLYNOMIAL_CHREC:
3575               if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3576                 return false;
3577               /* FALLTHRU */
3578
3579             default:
3580               return true;
3581             }
3582
3583         default:
3584           return true;
3585         }
3586     }
3587
3588   return false;
3589 }
3590
3591 /* Creates a conflict function with N dimensions.  The affine functions
3592    in each dimension follow.  */
3593
3594 static conflict_function *
3595 conflict_fn (unsigned n, ...)
3596 {
3597   unsigned i;
3598   conflict_function *ret = XCNEW (conflict_function);
3599   va_list ap;
3600
3601   gcc_assert (n > 0 && n <= MAX_DIM);
3602   va_start (ap, n);
3603
3604   ret->n = n;
3605   for (i = 0; i < n; i++)
3606     ret->fns[i] = va_arg (ap, affine_fn);
3607   va_end (ap);
3608
3609   return ret;
3610 }
3611
3612 /* Returns constant affine function with value CST.  */
3613
3614 static affine_fn
3615 affine_fn_cst (tree cst)
3616 {
3617   affine_fn fn;
3618   fn.create (1);
3619   fn.quick_push (cst);
3620   return fn;
3621 }
3622
3623 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
3624
3625 static affine_fn
3626 affine_fn_univar (tree cst, unsigned dim, tree coef)
3627 {
3628   affine_fn fn;
3629   fn.create (dim + 1);
3630   unsigned i;
3631
3632   gcc_assert (dim > 0);
3633   fn.quick_push (cst);
3634   for (i = 1; i < dim; i++)
3635     fn.quick_push (integer_zero_node);
3636   fn.quick_push (coef);
3637   return fn;
3638 }
3639
3640 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
3641    *OVERLAPS_B are initialized to the functions that describe the
3642    relation between the elements accessed twice by CHREC_A and
3643    CHREC_B.  For k >= 0, the following property is verified:
3644
3645    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3646
3647 static void
3648 analyze_ziv_subscript (tree chrec_a,
3649                        tree chrec_b,
3650                        conflict_function **overlaps_a,
3651                        conflict_function **overlaps_b,
3652                        tree *last_conflicts)
3653 {
3654   tree type, difference;
3655   dependence_stats.num_ziv++;
3656
3657   if (dump_file && (dump_flags & TDF_DETAILS))
3658     fprintf (dump_file, "(analyze_ziv_subscript \n");
3659
3660   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3661   chrec_a = chrec_convert (type, chrec_a, NULL);
3662   chrec_b = chrec_convert (type, chrec_b, NULL);
3663   difference = chrec_fold_minus (type, chrec_a, chrec_b);
3664
3665   switch (TREE_CODE (difference))
3666     {
3667     case INTEGER_CST:
3668       if (integer_zerop (difference))
3669         {
3670           /* The difference is equal to zero: the accessed index
3671              overlaps for each iteration in the loop.  */
3672           *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3673           *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3674           *last_conflicts = chrec_dont_know;
3675           dependence_stats.num_ziv_dependent++;
3676         }
3677       else
3678         {
3679           /* The accesses do not overlap.  */
3680           *overlaps_a = conflict_fn_no_dependence ();
3681           *overlaps_b = conflict_fn_no_dependence ();
3682           *last_conflicts = integer_zero_node;
3683           dependence_stats.num_ziv_independent++;
3684         }
3685       break;
3686
3687     default:
3688       /* We're not sure whether the indexes overlap.  For the moment,
3689          conservatively answer "don't know".  */
3690       if (dump_file && (dump_flags & TDF_DETAILS))
3691         fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3692
3693       *overlaps_a = conflict_fn_not_known ();
3694       *overlaps_b = conflict_fn_not_known ();
3695       *last_conflicts = chrec_dont_know;
3696       dependence_stats.num_ziv_unimplemented++;
3697       break;
3698     }
3699
3700   if (dump_file && (dump_flags & TDF_DETAILS))
3701     fprintf (dump_file, ")\n");
3702 }
3703
3704 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3705    and only if it fits to the int type.  If this is not the case, or the
3706    bound  on the number of iterations of LOOP could not be derived, returns
3707    chrec_dont_know.  */
3708
3709 static tree
3710 max_stmt_executions_tree (class loop *loop)
3711 {
3712   widest_int nit;
3713
3714   if (!max_stmt_executions (loop, &nit))
3715     return chrec_dont_know;
3716
3717   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3718     return chrec_dont_know;
3719
3720   return wide_int_to_tree (unsigned_type_node, nit);
3721 }
3722
3723 /* Determine whether the CHREC is always positive/negative.  If the expression
3724    cannot be statically analyzed, return false, otherwise set the answer into
3725    VALUE.  */
3726
3727 static bool
3728 chrec_is_positive (tree chrec, bool *value)
3729 {
3730   bool value0, value1, value2;
3731   tree end_value, nb_iter;
3732
3733   switch (TREE_CODE (chrec))
3734     {
3735     case POLYNOMIAL_CHREC:
3736       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3737           || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3738         return false;
3739
3740       /* FIXME -- overflows.  */
3741       if (value0 == value1)
3742         {
3743           *value = value0;
3744           return true;
3745         }
3746
3747       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3748          and the proof consists in showing that the sign never
3749          changes during the execution of the loop, from 0 to
3750          loop->nb_iterations.  */
3751       if (!evolution_function_is_affine_p (chrec))
3752         return false;
3753
3754       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3755       if (chrec_contains_undetermined (nb_iter))
3756         return false;
3757
3758 #if 0
3759       /* TODO -- If the test is after the exit, we may decrease the number of
3760          iterations by one.  */
3761       if (after_exit)
3762         nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3763 #endif
3764
3765       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3766
3767       if (!chrec_is_positive (end_value, &value2))
3768         return false;
3769
3770       *value = value0;
3771       return value0 == value1;
3772
3773     case INTEGER_CST:
3774       switch (tree_int_cst_sgn (chrec))
3775         {
3776         case -1:
3777           *value = false;
3778           break;
3779         case 1:
3780           *value = true;
3781           break;
3782         default:
3783           return false;
3784         }
3785       return true;
3786
3787     default:
3788       return false;
3789     }
3790 }
3791
3792
3793 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3794    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
3795    *OVERLAPS_B are initialized to the functions that describe the
3796    relation between the elements accessed twice by CHREC_A and
3797    CHREC_B.  For k >= 0, the following property is verified:
3798
3799    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3800
3801 static void
3802 analyze_siv_subscript_cst_affine (tree chrec_a,
3803                                   tree chrec_b,
3804                                   conflict_function **overlaps_a,
3805                                   conflict_function **overlaps_b,
3806                                   tree *last_conflicts)
3807 {
3808   bool value0, value1, value2;
3809   tree type, difference, tmp;
3810
3811   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3812   chrec_a = chrec_convert (type, chrec_a, NULL);
3813   chrec_b = chrec_convert (type, chrec_b, NULL);
3814   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3815
3816   /* Special case overlap in the first iteration.  */
3817   if (integer_zerop (difference))
3818     {
3819       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3820       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3821       *last_conflicts = integer_one_node;
3822       return;
3823     }
3824
3825   if (!chrec_is_positive (initial_condition (difference), &value0))
3826     {
3827       if (dump_file && (dump_flags & TDF_DETAILS))
3828         fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3829
3830       dependence_stats.num_siv_unimplemented++;
3831       *overlaps_a = conflict_fn_not_known ();
3832       *overlaps_b = conflict_fn_not_known ();
3833       *last_conflicts = chrec_dont_know;
3834       return;
3835     }
3836   else
3837     {
3838       if (value0 == false)
3839         {
3840           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3841               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3842             {
3843               if (dump_file && (dump_flags & TDF_DETAILS))
3844                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3845
3846               *overlaps_a = conflict_fn_not_known ();
3847               *overlaps_b = conflict_fn_not_known ();
3848               *last_conflicts = chrec_dont_know;
3849               dependence_stats.num_siv_unimplemented++;
3850               return;
3851             }
3852           else
3853             {
3854               if (value1 == true)
3855                 {
3856                   /* Example:
3857                      chrec_a = 12
3858                      chrec_b = {10, +, 1}
3859                   */
3860
3861                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3862                     {
3863                       HOST_WIDE_INT numiter;
3864                       class loop *loop = get_chrec_loop (chrec_b);
3865
3866                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3867                       tmp = fold_build2 (EXACT_DIV_EXPR, type,
3868                                          fold_build1 (ABS_EXPR, type, difference),
3869                                          CHREC_RIGHT (chrec_b));
3870                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3871                       *last_conflicts = integer_one_node;
3872
3873
3874                       /* Perform weak-zero siv test to see if overlap is
3875                          outside the loop bounds.  */
3876                       numiter = max_stmt_executions_int (loop);
3877
3878                       if (numiter >= 0
3879                           && compare_tree_int (tmp, numiter) > 0)
3880                         {
3881                           free_conflict_function (*overlaps_a);
3882                           free_conflict_function (*overlaps_b);
3883                           *overlaps_a = conflict_fn_no_dependence ();
3884                           *overlaps_b = conflict_fn_no_dependence ();
3885                           *last_conflicts = integer_zero_node;
3886                           dependence_stats.num_siv_independent++;
3887                           return;
3888                         }
3889                       dependence_stats.num_siv_dependent++;
3890                       return;
3891                     }
3892
3893                   /* When the step does not divide the difference, there are
3894                      no overlaps.  */
3895                   else
3896                     {
3897                       *overlaps_a = conflict_fn_no_dependence ();
3898                       *overlaps_b = conflict_fn_no_dependence ();
3899                       *last_conflicts = integer_zero_node;
3900                       dependence_stats.num_siv_independent++;
3901                       return;
3902                     }
3903                 }
3904
3905               else
3906                 {
3907                   /* Example:
3908                      chrec_a = 12
3909                      chrec_b = {10, +, -1}
3910
3911                      In this case, chrec_a will not overlap with chrec_b.  */
3912                   *overlaps_a = conflict_fn_no_dependence ();
3913                   *overlaps_b = conflict_fn_no_dependence ();
3914                   *last_conflicts = integer_zero_node;
3915                   dependence_stats.num_siv_independent++;
3916                   return;
3917                 }
3918             }
3919         }
3920       else
3921         {
3922           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3923               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3924             {
3925               if (dump_file && (dump_flags & TDF_DETAILS))
3926                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3927
3928               *overlaps_a = conflict_fn_not_known ();
3929               *overlaps_b = conflict_fn_not_known ();
3930               *last_conflicts = chrec_dont_know;
3931               dependence_stats.num_siv_unimplemented++;
3932               return;
3933             }
3934           else
3935             {
3936               if (value2 == false)
3937                 {
3938                   /* Example:
3939                      chrec_a = 3
3940                      chrec_b = {10, +, -1}
3941                   */
3942                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3943                     {
3944                       HOST_WIDE_INT numiter;
3945                       class loop *loop = get_chrec_loop (chrec_b);
3946
3947                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3948                       tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3949                                          CHREC_RIGHT (chrec_b));
3950                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3951                       *last_conflicts = integer_one_node;
3952
3953                       /* Perform weak-zero siv test to see if overlap is
3954                          outside the loop bounds.  */
3955                       numiter = max_stmt_executions_int (loop);
3956
3957                       if (numiter >= 0
3958                           && compare_tree_int (tmp, numiter) > 0)
3959                         {
3960                           free_conflict_function (*overlaps_a);
3961                           free_conflict_function (*overlaps_b);
3962                           *overlaps_a = conflict_fn_no_dependence ();
3963                           *overlaps_b = conflict_fn_no_dependence ();
3964                           *last_conflicts = integer_zero_node;
3965                           dependence_stats.num_siv_independent++;
3966                           return;
3967                         }
3968                       dependence_stats.num_siv_dependent++;
3969                       return;
3970                     }
3971
3972                   /* When the step does not divide the difference, there
3973                      are no overlaps.  */
3974                   else
3975                     {
3976                       *overlaps_a = conflict_fn_no_dependence ();
3977                       *overlaps_b = conflict_fn_no_dependence ();
3978                       *last_conflicts = integer_zero_node;
3979                       dependence_stats.num_siv_independent++;
3980                       return;
3981                     }
3982                 }
3983               else
3984                 {
3985                   /* Example:
3986                      chrec_a = 3
3987                      chrec_b = {4, +, 1}
3988
3989                      In this case, chrec_a will not overlap with chrec_b.  */
3990                   *overlaps_a = conflict_fn_no_dependence ();
3991                   *overlaps_b = conflict_fn_no_dependence ();
3992                   *last_conflicts = integer_zero_node;
3993                   dependence_stats.num_siv_independent++;
3994                   return;
3995                 }
3996             }
3997         }
3998     }
3999 }
4000
4001 /* Helper recursive function for initializing the matrix A.  Returns
4002    the initial value of CHREC.  */
4003
4004 static tree
4005 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
4006 {
4007   gcc_assert (chrec);
4008
4009   switch (TREE_CODE (chrec))
4010     {
4011     case POLYNOMIAL_CHREC:
4012       HOST_WIDE_INT chrec_right;
4013       if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
4014         return chrec_dont_know;
4015       chrec_right = int_cst_value (CHREC_RIGHT (chrec));
4016       /* We want to be able to negate without overflow.  */
4017       if (chrec_right == HOST_WIDE_INT_MIN)
4018         return chrec_dont_know;
4019       A[index][0] = mult * chrec_right;
4020       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
4021
4022     case PLUS_EXPR:
4023     case MULT_EXPR:
4024     case MINUS_EXPR:
4025       {
4026         tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4027         tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
4028
4029         return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
4030       }
4031
4032     CASE_CONVERT:
4033       {
4034         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4035         return chrec_convert (chrec_type (chrec), op, NULL);
4036       }
4037
4038     case BIT_NOT_EXPR:
4039       {
4040         /* Handle ~X as -1 - X.  */
4041         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4042         return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
4043                               build_int_cst (TREE_TYPE (chrec), -1), op);
4044       }
4045
4046     case INTEGER_CST:
4047       return chrec;
4048
4049     default:
4050       gcc_unreachable ();
4051       return NULL_TREE;
4052     }
4053 }
4054
4055 #define FLOOR_DIV(x,y) ((x) / (y))
4056
4057 /* Solves the special case of the Diophantine equation:
4058    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4059
4060    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
4061    number of iterations that loops X and Y run.  The overlaps will be
4062    constructed as evolutions in dimension DIM.  */
4063
4064 static void
4065 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
4066                                          HOST_WIDE_INT step_a,
4067                                          HOST_WIDE_INT step_b,
4068                                          affine_fn *overlaps_a,
4069                                          affine_fn *overlaps_b,
4070                                          tree *last_conflicts, int dim)
4071 {
4072   if (((step_a > 0 && step_b > 0)
4073        || (step_a < 0 && step_b < 0)))
4074     {
4075       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
4076       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
4077
4078       gcd_steps_a_b = gcd (step_a, step_b);
4079       step_overlaps_a = step_b / gcd_steps_a_b;
4080       step_overlaps_b = step_a / gcd_steps_a_b;
4081
4082       if (niter > 0)
4083         {
4084           tau2 = FLOOR_DIV (niter, step_overlaps_a);
4085           tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
4086           last_conflict = tau2;
4087           *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
4088         }
4089       else
4090         *last_conflicts = chrec_dont_know;
4091
4092       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
4093                                       build_int_cst (NULL_TREE,
4094                                                      step_overlaps_a));
4095       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
4096                                       build_int_cst (NULL_TREE,
4097                                                      step_overlaps_b));
4098     }
4099
4100   else
4101     {
4102       *overlaps_a = affine_fn_cst (integer_zero_node);
4103       *overlaps_b = affine_fn_cst (integer_zero_node);
4104       *last_conflicts = integer_zero_node;
4105     }
4106 }
4107
4108 /* Solves the special case of a Diophantine equation where CHREC_A is
4109    an affine bivariate function, and CHREC_B is an affine univariate
4110    function.  For example,
4111
4112    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4113
4114    has the following overlapping functions:
4115
4116    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4117    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4118    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4119
4120    FORNOW: This is a specialized implementation for a case occurring in
4121    a common benchmark.  Implement the general algorithm.  */
4122
4123 static void
4124 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
4125                                       conflict_function **overlaps_a,
4126                                       conflict_function **overlaps_b,
4127                                       tree *last_conflicts)
4128 {
4129   bool xz_p, yz_p, xyz_p;
4130   HOST_WIDE_INT step_x, step_y, step_z;
4131   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
4132   affine_fn overlaps_a_xz, overlaps_b_xz;
4133   affine_fn overlaps_a_yz, overlaps_b_yz;
4134   affine_fn overlaps_a_xyz, overlaps_b_xyz;
4135   affine_fn ova1, ova2, ovb;
4136   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
4137
4138   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
4139   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
4140   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
4141
4142   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
4143   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
4144   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
4145
4146   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
4147     {
4148       if (dump_file && (dump_flags & TDF_DETAILS))
4149         fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
4150
4151       *overlaps_a = conflict_fn_not_known ();
4152       *overlaps_b = conflict_fn_not_known ();
4153       *last_conflicts = chrec_dont_know;
4154       return;
4155     }
4156
4157   niter = MIN (niter_x, niter_z);
4158   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
4159                                            &overlaps_a_xz,
4160                                            &overlaps_b_xz,
4161                                            &last_conflicts_xz, 1);
4162   niter = MIN (niter_y, niter_z);
4163   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
4164                                            &overlaps_a_yz,
4165                                            &overlaps_b_yz,
4166                                            &last_conflicts_yz, 2);
4167   niter = MIN (niter_x, niter_z);
4168   niter = MIN (niter_y, niter);
4169   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
4170                                            &overlaps_a_xyz,
4171                                            &overlaps_b_xyz,
4172                                            &last_conflicts_xyz, 3);
4173
4174   xz_p = !integer_zerop (last_conflicts_xz);
4175   yz_p = !integer_zerop (last_conflicts_yz);
4176   xyz_p = !integer_zerop (last_conflicts_xyz);
4177
4178   if (xz_p || yz_p || xyz_p)
4179     {
4180       ova1 = affine_fn_cst (integer_zero_node);
4181       ova2 = affine_fn_cst (integer_zero_node);
4182       ovb = affine_fn_cst (integer_zero_node);
4183       if (xz_p)
4184         {
4185           affine_fn t0 = ova1;
4186           affine_fn t2 = ovb;
4187
4188           ova1 = affine_fn_plus (ova1, overlaps_a_xz);
4189           ovb = affine_fn_plus (ovb, overlaps_b_xz);
4190           affine_fn_free (t0);
4191           affine_fn_free (t2);
4192           *last_conflicts = last_conflicts_xz;
4193         }
4194       if (yz_p)
4195         {
4196           affine_fn t0 = ova2;
4197           affine_fn t2 = ovb;
4198
4199           ova2 = affine_fn_plus (ova2, overlaps_a_yz);
4200           ovb = affine_fn_plus (ovb, overlaps_b_yz);
4201           affine_fn_free (t0);
4202           affine_fn_free (t2);
4203           *last_conflicts = last_conflicts_yz;
4204         }
4205       if (xyz_p)
4206         {
4207           affine_fn t0 = ova1;
4208           affine_fn t2 = ova2;
4209           affine_fn t4 = ovb;
4210
4211           ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
4212           ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
4213           ovb = affine_fn_plus (ovb, overlaps_b_xyz);
4214           affine_fn_free (t0);
4215           affine_fn_free (t2);
4216           affine_fn_free (t4);
4217           *last_conflicts = last_conflicts_xyz;
4218         }
4219       *overlaps_a = conflict_fn (2, ova1, ova2);
4220       *overlaps_b = conflict_fn (1, ovb);
4221     }
4222   else
4223     {
4224       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4225       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4226       *last_conflicts = integer_zero_node;
4227     }
4228
4229   affine_fn_free (overlaps_a_xz);
4230   affine_fn_free (overlaps_b_xz);
4231   affine_fn_free (overlaps_a_yz);
4232   affine_fn_free (overlaps_b_yz);
4233   affine_fn_free (overlaps_a_xyz);
4234   affine_fn_free (overlaps_b_xyz);
4235 }
4236
4237 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
4238
4239 static void
4240 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
4241                     int size)
4242 {
4243   memcpy (vec2, vec1, size * sizeof (*vec1));
4244 }
4245
4246 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
4247
4248 static void
4249 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
4250                     int m, int n)
4251 {
4252   int i;
4253
4254   for (i = 0; i < m; i++)
4255     lambda_vector_copy (mat1[i], mat2[i], n);
4256 }
4257
4258 /* Store the N x N identity matrix in MAT.  */
4259
4260 static void
4261 lambda_matrix_id (lambda_matrix mat, int size)
4262 {
4263   int i, j;
4264
4265   for (i = 0; i < size; i++)
4266     for (j = 0; j < size; j++)
4267       mat[i][j] = (i == j) ? 1 : 0;
4268 }
4269
4270 /* Return the index of the first nonzero element of vector VEC1 between
4271    START and N.  We must have START <= N.
4272    Returns N if VEC1 is the zero vector.  */
4273
4274 static int
4275 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
4276 {
4277   int j = start;
4278   while (j < n && vec1[j] == 0)
4279     j++;
4280   return j;
4281 }
4282
4283 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4284    R2 = R2 + CONST1 * R1.  */
4285
4286 static bool
4287 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
4288                        lambda_int const1)
4289 {
4290   int i;
4291
4292   if (const1 == 0)
4293     return true;
4294
4295   for (i = 0; i < n; i++)
4296     {
4297       bool ovf;
4298       lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
4299       if (ovf)
4300         return false;
4301       lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
4302       if (ovf || tem2 == HOST_WIDE_INT_MIN)
4303         return false;
4304       mat[r2][i] = tem2;
4305     }
4306
4307   return true;
4308 }
4309
4310 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
4311    and store the result in VEC2.  */
4312
4313 static void
4314 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
4315                           int size, lambda_int const1)
4316 {
4317   int i;
4318
4319   if (const1 == 0)
4320     lambda_vector_clear (vec2, size);
4321   else
4322     for (i = 0; i < size; i++)
4323       vec2[i] = const1 * vec1[i];
4324 }
4325
4326 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
4327
4328 static void
4329 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
4330                       int size)
4331 {
4332   lambda_vector_mult_const (vec1, vec2, size, -1);
4333 }
4334
4335 /* Negate row R1 of matrix MAT which has N columns.  */
4336
4337 static void
4338 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
4339 {
4340   lambda_vector_negate (mat[r1], mat[r1], n);
4341 }
4342
4343 /* Return true if two vectors are equal.  */
4344
4345 static bool
4346 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
4347 {
4348   int i;
4349   for (i = 0; i < size; i++)
4350     if (vec1[i] != vec2[i])
4351       return false;
4352   return true;
4353 }
4354
4355 /* Given an M x N integer matrix A, this function determines an M x
4356    M unimodular matrix U, and an M x N echelon matrix S such that
4357    "U.A = S".  This decomposition is also known as "right Hermite".
4358
4359    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4360    Restructuring Compilers" Utpal Banerjee.  */
4361
4362 static bool
4363 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4364                              lambda_matrix S, lambda_matrix U)
4365 {
4366   int i, j, i0 = 0;
4367
4368   lambda_matrix_copy (A, S, m, n);
4369   lambda_matrix_id (U, m);
4370
4371   for (j = 0; j < n; j++)
4372     {
4373       if (lambda_vector_first_nz (S[j], m, i0) < m)
4374         {
4375           ++i0;
4376           for (i = m - 1; i >= i0; i--)
4377             {
4378               while (S[i][j] != 0)
4379                 {
4380                   lambda_int factor, a, b;
4381
4382                   a = S[i-1][j];
4383                   b = S[i][j];
4384                   gcc_assert (a != HOST_WIDE_INT_MIN);
4385                   factor = a / b;
4386
4387                   if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
4388                     return false;
4389                   std::swap (S[i], S[i-1]);
4390
4391                   if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
4392                     return false;
4393                   std::swap (U[i], U[i-1]);
4394                 }
4395             }
4396         }
4397     }
4398
4399   return true;
4400 }
4401
4402 /* Determines the overlapping elements due to accesses CHREC_A and
4403    CHREC_B, that are affine functions.  This function cannot handle
4404    symbolic evolution functions, ie. when initial conditions are
4405    parameters, because it uses lambda matrices of integers.  */
4406
4407 static void
4408 analyze_subscript_affine_affine (tree chrec_a,
4409                                  tree chrec_b,
4410                                  conflict_function **overlaps_a,
4411                                  conflict_function **overlaps_b,
4412                                  tree *last_conflicts)
4413 {
4414   unsigned nb_vars_a, nb_vars_b, dim;
4415   lambda_int gamma, gcd_alpha_beta;
4416   lambda_matrix A, U, S;
4417   struct obstack scratch_obstack;
4418
4419   if (eq_evolutions_p (chrec_a, chrec_b))
4420     {
4421       /* The accessed index overlaps for each iteration in the
4422          loop.  */
4423       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4424       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4425       *last_conflicts = chrec_dont_know;
4426       return;
4427     }
4428   if (dump_file && (dump_flags & TDF_DETAILS))
4429     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4430
4431   /* For determining the initial intersection, we have to solve a
4432      Diophantine equation.  This is the most time consuming part.
4433
4434      For answering to the question: "Is there a dependence?" we have
4435      to prove that there exists a solution to the Diophantine
4436      equation, and that the solution is in the iteration domain,
4437      i.e. the solution is positive or zero, and that the solution
4438      happens before the upper bound loop.nb_iterations.  Otherwise
4439      there is no dependence.  This function outputs a description of
4440      the iterations that hold the intersections.  */
4441
4442   nb_vars_a = nb_vars_in_chrec (chrec_a);
4443   nb_vars_b = nb_vars_in_chrec (chrec_b);
4444
4445   gcc_obstack_init (&scratch_obstack);
4446
4447   dim = nb_vars_a + nb_vars_b;
4448   U = lambda_matrix_new (dim, dim, &scratch_obstack);
4449   A = lambda_matrix_new (dim, 1, &scratch_obstack);
4450   S = lambda_matrix_new (dim, 1, &scratch_obstack);
4451
4452   tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4453   tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4454   if (init_a == chrec_dont_know
4455       || init_b == chrec_dont_know)
4456     {
4457       if (dump_file && (dump_flags & TDF_DETAILS))
4458         fprintf (dump_file, "affine-affine test failed: "
4459                  "representation issue.\n");
4460       *overlaps_a = conflict_fn_not_known ();
4461       *overlaps_b = conflict_fn_not_known ();
4462       *last_conflicts = chrec_dont_know;
4463       goto end_analyze_subs_aa;
4464     }
4465   gamma = int_cst_value (init_b) - int_cst_value (init_a);
4466
4467   /* Don't do all the hard work of solving the Diophantine equation
4468      when we already know the solution: for example,
4469      | {3, +, 1}_1
4470      | {3, +, 4}_2
4471      | gamma = 3 - 3 = 0.
4472      Then the first overlap occurs during the first iterations:
4473      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4474   */
4475   if (gamma == 0)
4476     {
4477       if (nb_vars_a == 1 && nb_vars_b == 1)
4478         {
4479           HOST_WIDE_INT step_a, step_b;
4480           HOST_WIDE_INT niter, niter_a, niter_b;
4481           affine_fn ova, ovb;
4482
4483           niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4484           niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4485           niter = MIN (niter_a, niter_b);
4486           step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4487           step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4488
4489           compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4490                                                    &ova, &ovb,
4491                                                    last_conflicts, 1);
4492           *overlaps_a = conflict_fn (1, ova);
4493           *overlaps_b = conflict_fn (1, ovb);
4494         }
4495
4496       else if (nb_vars_a == 2 && nb_vars_b == 1)
4497         compute_overlap_steps_for_affine_1_2
4498           (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4499
4500       else if (nb_vars_a == 1 && nb_vars_b == 2)
4501         compute_overlap_steps_for_affine_1_2
4502           (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4503
4504       else
4505         {
4506           if (dump_file && (dump_flags & TDF_DETAILS))
4507             fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4508           *overlaps_a = conflict_fn_not_known ();
4509           *overlaps_b = conflict_fn_not_known ();
4510           *last_conflicts = chrec_dont_know;
4511         }
4512       goto end_analyze_subs_aa;
4513     }
4514
4515   /* U.A = S */
4516   if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
4517     {
4518       *overlaps_a = conflict_fn_not_known ();
4519       *overlaps_b = conflict_fn_not_known ();
4520       *last_conflicts = chrec_dont_know;
4521       goto end_analyze_subs_aa;
4522     }
4523
4524   if (S[0][0] < 0)
4525     {
4526       S[0][0] *= -1;
4527       lambda_matrix_row_negate (U, dim, 0);
4528     }
4529   gcd_alpha_beta = S[0][0];
4530
4531   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4532      but that is a quite strange case.  Instead of ICEing, answer
4533      don't know.  */
4534   if (gcd_alpha_beta == 0)
4535     {
4536       *overlaps_a = conflict_fn_not_known ();
4537       *overlaps_b = conflict_fn_not_known ();
4538       *last_conflicts = chrec_dont_know;
4539       goto end_analyze_subs_aa;
4540     }
4541
4542   /* The classic "gcd-test".  */
4543   if (!int_divides_p (gcd_alpha_beta, gamma))
4544     {
4545       /* The "gcd-test" has determined that there is no integer
4546          solution, i.e. there is no dependence.  */
4547       *overlaps_a = conflict_fn_no_dependence ();
4548       *overlaps_b = conflict_fn_no_dependence ();
4549       *last_conflicts = integer_zero_node;
4550     }
4551
4552   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
4553   else if (nb_vars_a == 1 && nb_vars_b == 1)
4554     {
4555       /* Both functions should have the same evolution sign.  */
4556       if (((A[0][0] > 0 && -A[1][0] > 0)
4557            || (A[0][0] < 0 && -A[1][0] < 0)))
4558         {
4559           /* The solutions are given by:
4560              |
4561              | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
4562              |                           [u21 u22]    [y0]
4563
4564              For a given integer t.  Using the following variables,
4565
4566              | i0 = u11 * gamma / gcd_alpha_beta
4567              | j0 = u12 * gamma / gcd_alpha_beta
4568              | i1 = u21
4569              | j1 = u22
4570
4571              the solutions are:
4572
4573              | x0 = i0 + i1 * t,
4574              | y0 = j0 + j1 * t.  */
4575           HOST_WIDE_INT i0, j0, i1, j1;
4576
4577           i0 = U[0][0] * gamma / gcd_alpha_beta;
4578           j0 = U[0][1] * gamma / gcd_alpha_beta;
4579           i1 = U[1][0];
4580           j1 = U[1][1];
4581
4582           if ((i1 == 0 && i0 < 0)
4583               || (j1 == 0 && j0 < 0))
4584             {
4585               /* There is no solution.
4586                  FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4587                  falls in here, but for the moment we don't look at the
4588                  upper bound of the iteration domain.  */
4589               *overlaps_a = conflict_fn_no_dependence ();
4590               *overlaps_b = conflict_fn_no_dependence ();
4591               *last_conflicts = integer_zero_node;
4592               goto end_analyze_subs_aa;
4593             }
4594
4595           if (i1 > 0 && j1 > 0)
4596             {
4597               HOST_WIDE_INT niter_a
4598                 = max_stmt_executions_int (get_chrec_loop (chrec_a));
4599               HOST_WIDE_INT niter_b
4600                 = max_stmt_executions_int (get_chrec_loop (chrec_b));
4601               HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4602
4603               /* (X0, Y0) is a solution of the Diophantine equation:
4604                  "chrec_a (X0) = chrec_b (Y0)".  */
4605               HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4606                                         CEIL (-j0, j1));
4607               HOST_WIDE_INT x0 = i1 * tau1 + i0;
4608               HOST_WIDE_INT y0 = j1 * tau1 + j0;
4609
4610               /* (X1, Y1) is the smallest positive solution of the eq
4611                  "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4612                  first conflict occurs.  */
4613               HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4614               HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4615               HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4616
4617               if (niter > 0)
4618                 {
4619                   /* If the overlap occurs outside of the bounds of the
4620                      loop, there is no dependence.  */
4621                   if (x1 >= niter_a || y1 >= niter_b)
4622                     {
4623                       *overlaps_a = conflict_fn_no_dependence ();
4624                       *overlaps_b = conflict_fn_no_dependence ();
4625                       *last_conflicts = integer_zero_node;
4626                       goto end_analyze_subs_aa;
4627                     }
4628
4629                   /* max stmt executions can get quite large, avoid
4630                      overflows by using wide ints here.  */
4631                   widest_int tau2
4632                     = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4633                                 wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4634                   widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4635                   if (wi::min_precision (last_conflict, SIGNED)
4636                       <= TYPE_PRECISION (integer_type_node))
4637                     *last_conflicts
4638                        = build_int_cst (integer_type_node,
4639                                         last_conflict.to_shwi ());
4640                   else
4641                     *last_conflicts = chrec_dont_know;
4642                 }
4643               else
4644                 *last_conflicts = chrec_dont_know;
4645
4646               *overlaps_a
4647                 = conflict_fn (1,
4648                                affine_fn_univar (build_int_cst (NULL_TREE, x1),
4649                                                  1,
4650                                                  build_int_cst (NULL_TREE, i1)));
4651               *overlaps_b
4652                 = conflict_fn (1,
4653                                affine_fn_univar (build_int_cst (NULL_TREE, y1),
4654                                                  1,
4655                                                  build_int_cst (NULL_TREE, j1)));
4656             }
4657           else
4658             {
4659               /* FIXME: For the moment, the upper bound of the
4660                  iteration domain for i and j is not checked.  */
4661               if (dump_file && (dump_flags & TDF_DETAILS))
4662                 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4663               *overlaps_a = conflict_fn_not_known ();
4664               *overlaps_b = conflict_fn_not_known ();
4665               *last_conflicts = chrec_dont_know;
4666             }
4667         }
4668       else
4669         {
4670           if (dump_file && (dump_flags & TDF_DETAILS))
4671             fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4672           *overlaps_a = conflict_fn_not_known ();
4673           *overlaps_b = conflict_fn_not_known ();
4674           *last_conflicts = chrec_dont_know;
4675         }
4676     }
4677   else
4678     {
4679       if (dump_file && (dump_flags & TDF_DETAILS))
4680         fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4681       *overlaps_a = conflict_fn_not_known ();
4682       *overlaps_b = conflict_fn_not_known ();
4683       *last_conflicts = chrec_dont_know;
4684     }
4685
4686 end_analyze_subs_aa:
4687   obstack_free (&scratch_obstack, NULL);
4688   if (dump_file && (dump_flags & TDF_DETAILS))
4689     {
4690       fprintf (dump_file, "  (overlaps_a = ");
4691       dump_conflict_function (dump_file, *overlaps_a);
4692       fprintf (dump_file, ")\n  (overlaps_b = ");
4693       dump_conflict_function (dump_file, *overlaps_b);
4694       fprintf (dump_file, "))\n");
4695     }
4696 }
4697
4698 /* Returns true when analyze_subscript_affine_affine can be used for
4699    determining the dependence relation between chrec_a and chrec_b,
4700    that contain symbols.  This function modifies chrec_a and chrec_b
4701    such that the analysis result is the same, and such that they don't
4702    contain symbols, and then can safely be passed to the analyzer.
4703
4704    Example: The analysis of the following tuples of evolutions produce
4705    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4706    vs. {0, +, 1}_1
4707
4708    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4709    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4710 */
4711
4712 static bool
4713 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4714 {
4715   tree diff, type, left_a, left_b, right_b;
4716
4717   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4718       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4719     /* FIXME: For the moment not handled.  Might be refined later.  */
4720     return false;
4721
4722   type = chrec_type (*chrec_a);
4723   left_a = CHREC_LEFT (*chrec_a);
4724   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4725   diff = chrec_fold_minus (type, left_a, left_b);
4726
4727   if (!evolution_function_is_constant_p (diff))
4728     return false;
4729
4730   if (dump_file && (dump_flags & TDF_DETAILS))
4731     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4732
4733   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4734                                      diff, CHREC_RIGHT (*chrec_a));
4735   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4736   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4737                                      build_int_cst (type, 0),
4738                                      right_b);
4739   return true;
4740 }
4741
4742 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
4743    *OVERLAPS_B are initialized to the functions that describe the
4744    relation between the elements accessed twice by CHREC_A and
4745    CHREC_B.  For k >= 0, the following property is verified:
4746
4747    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4748
4749 static void
4750 analyze_siv_subscript (tree chrec_a,
4751                        tree chrec_b,
4752                        conflict_function **overlaps_a,
4753                        conflict_function **overlaps_b,
4754                        tree *last_conflicts,
4755                        int loop_nest_num)
4756 {
4757   dependence_stats.num_siv++;
4758
4759   if (dump_file && (dump_flags & TDF_DETAILS))
4760     fprintf (dump_file, "(analyze_siv_subscript \n");
4761
4762   if (evolution_function_is_constant_p (chrec_a)
4763       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4764     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4765                                       overlaps_a, overlaps_b, last_conflicts);
4766
4767   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4768            && evolution_function_is_constant_p (chrec_b))
4769     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4770                                       overlaps_b, overlaps_a, last_conflicts);
4771
4772   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4773            && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4774     {
4775       if (!chrec_contains_symbols (chrec_a)
4776           && !chrec_contains_symbols (chrec_b))
4777         {
4778           analyze_subscript_affine_affine (chrec_a, chrec_b,
4779                                            overlaps_a, overlaps_b,
4780                                            last_conflicts);
4781
4782           if (CF_NOT_KNOWN_P (*overlaps_a)
4783               || CF_NOT_KNOWN_P (*overlaps_b))
4784             dependence_stats.num_siv_unimplemented++;
4785           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4786                    || CF_NO_DEPENDENCE_P (*overlaps_b))
4787             dependence_stats.num_siv_independent++;
4788           else
4789             dependence_stats.num_siv_dependent++;
4790         }
4791       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4792                                                         &chrec_b))
4793         {
4794           analyze_subscript_affine_affine (chrec_a, chrec_b,
4795                                            overlaps_a, overlaps_b,
4796                                            last_conflicts);
4797
4798           if (CF_NOT_KNOWN_P (*overlaps_a)
4799               || CF_NOT_KNOWN_P (*overlaps_b))
4800             dependence_stats.num_siv_unimplemented++;
4801           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4802                    || CF_NO_DEPENDENCE_P (*overlaps_b))
4803             dependence_stats.num_siv_independent++;
4804           else
4805             dependence_stats.num_siv_dependent++;
4806         }
4807       else
4808         goto siv_subscript_dontknow;
4809     }
4810
4811   else
4812     {
4813     siv_subscript_dontknow:;
4814       if (dump_file && (dump_flags & TDF_DETAILS))
4815         fprintf (dump_file, "  siv test failed: unimplemented");
4816       *overlaps_a = conflict_fn_not_known ();
4817       *overlaps_b = conflict_fn_not_known ();
4818       *last_conflicts = chrec_dont_know;
4819       dependence_stats.num_siv_unimplemented++;
4820     }
4821
4822   if (dump_file && (dump_flags & TDF_DETAILS))
4823     fprintf (dump_file, ")\n");
4824 }
4825
4826 /* Returns false if we can prove that the greatest common divisor of the steps
4827    of CHREC does not divide CST, false otherwise.  */
4828
4829 static bool
4830 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4831 {
4832   HOST_WIDE_INT cd = 0, val;
4833   tree step;
4834
4835   if (!tree_fits_shwi_p (cst))
4836     return true;
4837   val = tree_to_shwi (cst);
4838
4839   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4840     {
4841       step = CHREC_RIGHT (chrec);
4842       if (!tree_fits_shwi_p (step))
4843         return true;
4844       cd = gcd (cd, tree_to_shwi (step));
4845       chrec = CHREC_LEFT (chrec);
4846     }
4847
4848   return val % cd == 0;
4849 }
4850
4851 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4852    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
4853    functions that describe the relation between the elements accessed
4854    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
4855    is verified:
4856
4857    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4858
4859 static void
4860 analyze_miv_subscript (tree chrec_a,
4861                        tree chrec_b,
4862                        conflict_function **overlaps_a,
4863                        conflict_function **overlaps_b,
4864                        tree *last_conflicts,
4865                        class loop *loop_nest)
4866 {
4867   tree type, difference;
4868
4869   dependence_stats.num_miv++;
4870   if (dump_file && (dump_flags & TDF_DETAILS))
4871     fprintf (dump_file, "(analyze_miv_subscript \n");
4872
4873   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4874   chrec_a = chrec_convert (type, chrec_a, NULL);
4875   chrec_b = chrec_convert (type, chrec_b, NULL);
4876   difference = chrec_fold_minus (type, chrec_a, chrec_b);
4877
4878   if (eq_evolutions_p (chrec_a, chrec_b))
4879     {
4880       /* Access functions are the same: all the elements are accessed
4881          in the same order.  */
4882       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4883       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4884       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4885       dependence_stats.num_miv_dependent++;
4886     }
4887
4888   else if (evolution_function_is_constant_p (difference)
4889            && evolution_function_is_affine_multivariate_p (chrec_a,
4890                                                            loop_nest->num)
4891            && !gcd_of_steps_may_divide_p (chrec_a, difference))
4892     {
4893       /* testsuite/.../ssa-chrec-33.c
4894          {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
4895
4896          The difference is 1, and all the evolution steps are multiples
4897          of 2, consequently there are no overlapping elements.  */
4898       *overlaps_a = conflict_fn_no_dependence ();
4899       *overlaps_b = conflict_fn_no_dependence ();
4900       *last_conflicts = integer_zero_node;
4901       dependence_stats.num_miv_independent++;
4902     }
4903
4904   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4905            && !chrec_contains_symbols (chrec_a, loop_nest)
4906            && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4907            && !chrec_contains_symbols (chrec_b, loop_nest))
4908     {
4909       /* testsuite/.../ssa-chrec-35.c
4910          {0, +, 1}_2  vs.  {0, +, 1}_3
4911          the overlapping elements are respectively located at iterations:
4912          {0, +, 1}_x and {0, +, 1}_x,
4913          in other words, we have the equality:
4914          {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4915
4916          Other examples:
4917          {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4918          {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4919
4920          {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4921          {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4922       */
4923       analyze_subscript_affine_affine (chrec_a, chrec_b,
4924                                        overlaps_a, overlaps_b, last_conflicts);
4925
4926       if (CF_NOT_KNOWN_P (*overlaps_a)
4927           || CF_NOT_KNOWN_P (*overlaps_b))
4928         dependence_stats.num_miv_unimplemented++;
4929       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4930                || CF_NO_DEPENDENCE_P (*overlaps_b))
4931         dependence_stats.num_miv_independent++;
4932       else
4933         dependence_stats.num_miv_dependent++;
4934     }
4935
4936   else
4937     {
4938       /* When the analysis is too difficult, answer "don't know".  */
4939       if (dump_file && (dump_flags & TDF_DETAILS))
4940         fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4941
4942       *overlaps_a = conflict_fn_not_known ();
4943       *overlaps_b = conflict_fn_not_known ();
4944       *last_conflicts = chrec_dont_know;
4945       dependence_stats.num_miv_unimplemented++;
4946     }
4947
4948   if (dump_file && (dump_flags & TDF_DETAILS))
4949     fprintf (dump_file, ")\n");
4950 }
4951
4952 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4953    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
4954    OVERLAP_ITERATIONS_B are initialized with two functions that
4955    describe the iterations that contain conflicting elements.
4956
4957    Remark: For an integer k >= 0, the following equality is true:
4958
4959    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4960 */
4961
4962 static void
4963 analyze_overlapping_iterations (tree chrec_a,
4964                                 tree chrec_b,
4965                                 conflict_function **overlap_iterations_a,
4966                                 conflict_function **overlap_iterations_b,
4967                                 tree *last_conflicts, class loop *loop_nest)
4968 {
4969   unsigned int lnn = loop_nest->num;
4970
4971   dependence_stats.num_subscript_tests++;
4972
4973   if (dump_file && (dump_flags & TDF_DETAILS))
4974     {
4975       fprintf (dump_file, "(analyze_overlapping_iterations \n");
4976       fprintf (dump_file, "  (chrec_a = ");
4977       print_generic_expr (dump_file, chrec_a);
4978       fprintf (dump_file, ")\n  (chrec_b = ");
4979       print_generic_expr (dump_file, chrec_b);
4980       fprintf (dump_file, ")\n");
4981     }
4982
4983   if (chrec_a == NULL_TREE
4984       || chrec_b == NULL_TREE
4985       || chrec_contains_undetermined (chrec_a)
4986       || chrec_contains_undetermined (chrec_b))
4987     {
4988       dependence_stats.num_subscript_undetermined++;
4989
4990       *overlap_iterations_a = conflict_fn_not_known ();
4991       *overlap_iterations_b = conflict_fn_not_known ();
4992     }
4993
4994   /* If they are the same chrec, and are affine, they overlap
4995      on every iteration.  */
4996   else if (eq_evolutions_p (chrec_a, chrec_b)
4997            && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4998                || operand_equal_p (chrec_a, chrec_b, 0)))
4999     {
5000       dependence_stats.num_same_subscript_function++;
5001       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
5002       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
5003       *last_conflicts = chrec_dont_know;
5004     }
5005
5006   /* If they aren't the same, and aren't affine, we can't do anything
5007      yet.  */
5008   else if ((chrec_contains_symbols (chrec_a)
5009             || chrec_contains_symbols (chrec_b))
5010            && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5011                || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
5012     {
5013       dependence_stats.num_subscript_undetermined++;
5014       *overlap_iterations_a = conflict_fn_not_known ();
5015       *overlap_iterations_b = conflict_fn_not_known ();
5016     }
5017
5018   else if (ziv_subscript_p (chrec_a, chrec_b))
5019     analyze_ziv_subscript (chrec_a, chrec_b,
5020                            overlap_iterations_a, overlap_iterations_b,
5021                            last_conflicts);
5022
5023   else if (siv_subscript_p (chrec_a, chrec_b))
5024     analyze_siv_subscript (chrec_a, chrec_b,
5025                            overlap_iterations_a, overlap_iterations_b,
5026                            last_conflicts, lnn);
5027
5028   else
5029     analyze_miv_subscript (chrec_a, chrec_b,
5030                            overlap_iterations_a, overlap_iterations_b,
5031                            last_conflicts, loop_nest);
5032
5033   if (dump_file && (dump_flags & TDF_DETAILS))
5034     {
5035       fprintf (dump_file, "  (overlap_iterations_a = ");
5036       dump_conflict_function (dump_file, *overlap_iterations_a);
5037       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
5038       dump_conflict_function (dump_file, *overlap_iterations_b);
5039       fprintf (dump_file, "))\n");
5040     }
5041 }
5042
5043 /* Helper function for uniquely inserting distance vectors.  */
5044
5045 static void
5046 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
5047 {
5048   for (lambda_vector v : DDR_DIST_VECTS (ddr))
5049     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
5050       return;
5051
5052   DDR_DIST_VECTS (ddr).safe_push (dist_v);
5053 }
5054
5055 /* Helper function for uniquely inserting direction vectors.  */
5056
5057 static void
5058 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
5059 {
5060   for (lambda_vector v : DDR_DIR_VECTS (ddr))
5061     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
5062       return;
5063
5064   DDR_DIR_VECTS (ddr).safe_push (dir_v);
5065 }
5066
5067 /* Add a distance of 1 on all the loops outer than INDEX.  If we
5068    haven't yet determined a distance for this outer loop, push a new
5069    distance vector composed of the previous distance, and a distance
5070    of 1 for this outer loop.  Example:
5071
5072    | loop_1
5073    |   loop_2
5074    |     A[10]
5075    |   endloop_2
5076    | endloop_1
5077
5078    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
5079    save (0, 1), then we have to save (1, 0).  */
5080
5081 static void
5082 add_outer_distances (struct data_dependence_relation *ddr,
5083                      lambda_vector dist_v, int index)
5084 {
5085   /* For each outer loop where init_v is not set, the accesses are
5086      in dependence of distance 1 in the loop.  */
5087   while (--index >= 0)
5088     {
5089       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5090       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5091       save_v[index] = 1;
5092       save_dist_v (ddr, save_v);
5093     }
5094 }
5095
5096 /* Return false when fail to represent the data dependence as a
5097    distance vector.  A_INDEX is the index of the first reference
5098    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5099    second reference.  INIT_B is set to true when a component has been
5100    added to the distance vector DIST_V.  INDEX_CARRY is then set to
5101    the index in DIST_V that carries the dependence.  */
5102
5103 static bool
5104 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
5105                              unsigned int a_index, unsigned int b_index,
5106                              lambda_vector dist_v, bool *init_b,
5107                              int *index_carry)
5108 {
5109   unsigned i;
5110   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5111   class loop *loop = DDR_LOOP_NEST (ddr)[0];
5112
5113   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5114     {
5115       tree access_fn_a, access_fn_b;
5116       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
5117
5118       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5119         {
5120           non_affine_dependence_relation (ddr);
5121           return false;
5122         }
5123
5124       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
5125       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
5126
5127       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
5128           && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
5129         {
5130           HOST_WIDE_INT dist;
5131           int index;
5132           int var_a = CHREC_VARIABLE (access_fn_a);
5133           int var_b = CHREC_VARIABLE (access_fn_b);
5134
5135           if (var_a != var_b
5136               || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5137             {
5138               non_affine_dependence_relation (ddr);
5139               return false;
5140             }
5141
5142           /* When data references are collected in a loop while data
5143              dependences are analyzed in loop nest nested in the loop, we
5144              would have more number of access functions than number of
5145              loops.  Skip access functions of loops not in the loop nest.
5146
5147              See PR89725 for more information.  */
5148           if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
5149             continue;
5150
5151           dist = int_cst_value (SUB_DISTANCE (subscript));
5152           index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
5153           *index_carry = MIN (index, *index_carry);
5154
5155           /* This is the subscript coupling test.  If we have already
5156              recorded a distance for this loop (a distance coming from
5157              another subscript), it should be the same.  For example,
5158              in the following code, there is no dependence:
5159
5160              | loop i = 0, N, 1
5161              |   T[i+1][i] = ...
5162              |   ... = T[i][i]
5163              | endloop
5164           */
5165           if (init_v[index] != 0 && dist_v[index] != dist)
5166             {
5167               finalize_ddr_dependent (ddr, chrec_known);
5168               return false;
5169             }
5170
5171           dist_v[index] = dist;
5172           init_v[index] = 1;
5173           *init_b = true;
5174         }
5175       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
5176         {
5177           /* This can be for example an affine vs. constant dependence
5178              (T[i] vs. T[3]) that is not an affine dependence and is
5179              not representable as a distance vector.  */
5180           non_affine_dependence_relation (ddr);
5181           return false;
5182         }
5183       else
5184         *init_b = true;
5185     }
5186
5187   return true;
5188 }
5189
5190 /* Return true when the DDR contains only invariant access functions wrto. loop
5191    number LNUM.  */
5192
5193 static bool
5194 invariant_access_functions (const struct data_dependence_relation *ddr,
5195                             int lnum)
5196 {
5197   for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5198     if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
5199         || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
5200       return false;
5201
5202   return true;
5203 }
5204
5205 /* Helper function for the case where DDR_A and DDR_B are the same
5206    multivariate access function with a constant step.  For an example
5207    see pr34635-1.c.  */
5208
5209 static void
5210 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
5211 {
5212   int x_1, x_2;
5213   tree c_1 = CHREC_LEFT (c_2);
5214   tree c_0 = CHREC_LEFT (c_1);
5215   lambda_vector dist_v;
5216   HOST_WIDE_INT v1, v2, cd;
5217
5218   /* Polynomials with more than 2 variables are not handled yet.  When
5219      the evolution steps are parameters, it is not possible to
5220      represent the dependence using classical distance vectors.  */
5221   if (TREE_CODE (c_0) != INTEGER_CST
5222       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
5223       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
5224     {
5225       DDR_AFFINE_P (ddr) = false;
5226       return;
5227     }
5228
5229   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
5230   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
5231
5232   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
5233   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5234   v1 = int_cst_value (CHREC_RIGHT (c_1));
5235   v2 = int_cst_value (CHREC_RIGHT (c_2));
5236   cd = gcd (v1, v2);
5237   v1 /= cd;
5238   v2 /= cd;
5239
5240   if (v2 < 0)
5241     {
5242       v2 = -v2;
5243       v1 = -v1;
5244     }
5245
5246   dist_v[x_1] = v2;
5247   dist_v[x_2] = -v1;
5248   save_dist_v (ddr, dist_v);
5249
5250   add_outer_distances (ddr, dist_v, x_1);
5251 }
5252
5253 /* Helper function for the case where DDR_A and DDR_B are the same
5254    access functions.  */
5255
5256 static void
5257 add_other_self_distances (struct data_dependence_relation *ddr)
5258 {
5259   lambda_vector dist_v;
5260   unsigned i;
5261   int index_carry = DDR_NB_LOOPS (ddr);
5262   subscript *sub;
5263   class loop *loop = DDR_LOOP_NEST (ddr)[0];
5264
5265   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5266     {
5267       tree access_fun = SUB_ACCESS_FN (sub, 0);
5268
5269       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
5270         {
5271           if (!evolution_function_is_univariate_p (access_fun, loop->num))
5272             {
5273               if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
5274                 {
5275                   DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
5276                   return;
5277                 }
5278
5279               access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
5280
5281               if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
5282                 add_multivariate_self_dist (ddr, access_fun);
5283               else
5284                 /* The evolution step is not constant: it varies in
5285                    the outer loop, so this cannot be represented by a
5286                    distance vector.  For example in pr34635.c the
5287                    evolution is {0, +, {0, +, 4}_1}_2.  */
5288                 DDR_AFFINE_P (ddr) = false;
5289
5290               return;
5291             }
5292
5293           /* When data references are collected in a loop while data
5294              dependences are analyzed in loop nest nested in the loop, we
5295              would have more number of access functions than number of
5296              loops.  Skip access functions of loops not in the loop nest.
5297
5298              See PR89725 for more information.  */
5299           if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
5300                                   loop))
5301             continue;
5302
5303           index_carry = MIN (index_carry,
5304                              index_in_loop_nest (CHREC_VARIABLE (access_fun),
5305                                                  DDR_LOOP_NEST (ddr)));
5306         }
5307     }
5308
5309   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5310   add_outer_distances (ddr, dist_v, index_carry);
5311 }
5312
5313 static void
5314 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
5315 {
5316   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5317
5318   dist_v[0] = 1;
5319   save_dist_v (ddr, dist_v);
5320 }
5321
5322 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
5323    is the case for example when access functions are the same and
5324    equal to a constant, as in:
5325
5326    | loop_1
5327    |   A[3] = ...
5328    |   ... = A[3]
5329    | endloop_1
5330
5331    in which case the distance vectors are (0) and (1).  */
5332
5333 static void
5334 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
5335 {
5336   unsigned i, j;
5337
5338   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5339     {
5340       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
5341       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
5342       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
5343
5344       for (j = 0; j < ca->n; j++)
5345         if (affine_function_zero_p (ca->fns[j]))
5346           {
5347             insert_innermost_unit_dist_vector (ddr);
5348             return;
5349           }
5350
5351       for (j = 0; j < cb->n; j++)
5352         if (affine_function_zero_p (cb->fns[j]))
5353           {
5354             insert_innermost_unit_dist_vector (ddr);
5355             return;
5356           }
5357     }
5358 }
5359
5360 /* Return true when the DDR contains two data references that have the
5361    same access functions.  */
5362
5363 static inline bool
5364 same_access_functions (const struct data_dependence_relation *ddr)
5365 {
5366   for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5367     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5368                           SUB_ACCESS_FN (sub, 1)))
5369       return false;
5370
5371   return true;
5372 }
5373
5374 /* Compute the classic per loop distance vector.  DDR is the data
5375    dependence relation to build a vector from.  Return false when fail
5376    to represent the data dependence as a distance vector.  */
5377
5378 static bool
5379 build_classic_dist_vector (struct data_dependence_relation *ddr,
5380                            class loop *loop_nest)
5381 {
5382   bool init_b = false;
5383   int index_carry = DDR_NB_LOOPS (ddr);
5384   lambda_vector dist_v;
5385
5386   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5387     return false;
5388
5389   if (same_access_functions (ddr))
5390     {
5391       /* Save the 0 vector.  */
5392       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5393       save_dist_v (ddr, dist_v);
5394
5395       if (invariant_access_functions (ddr, loop_nest->num))
5396         add_distance_for_zero_overlaps (ddr);
5397
5398       if (DDR_NB_LOOPS (ddr) > 1)
5399         add_other_self_distances (ddr);
5400
5401       return true;
5402     }
5403
5404   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5405   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5406     return false;
5407
5408   /* Save the distance vector if we initialized one.  */
5409   if (init_b)
5410     {
5411       /* Verify a basic constraint: classic distance vectors should
5412          always be lexicographically positive.
5413
5414          Data references are collected in the order of execution of
5415          the program, thus for the following loop
5416
5417          | for (i = 1; i < 100; i++)
5418          |   for (j = 1; j < 100; j++)
5419          |     {
5420          |       t = T[j+1][i-1];  // A
5421          |       T[j][i] = t + 2;  // B
5422          |     }
5423
5424          references are collected following the direction of the wind:
5425          A then B.  The data dependence tests are performed also
5426          following this order, such that we're looking at the distance
5427          separating the elements accessed by A from the elements later
5428          accessed by B.  But in this example, the distance returned by
5429          test_dep (A, B) is lexicographically negative (-1, 1), that
5430          means that the access A occurs later than B with respect to
5431          the outer loop, ie. we're actually looking upwind.  In this
5432          case we solve test_dep (B, A) looking downwind to the
5433          lexicographically positive solution, that returns the
5434          distance vector (1, -1).  */
5435       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5436         {
5437           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5438           if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5439             return false;
5440           compute_subscript_distance (ddr);
5441           if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5442                                             &index_carry))
5443             return false;
5444           save_dist_v (ddr, save_v);
5445           DDR_REVERSED_P (ddr) = true;
5446
5447           /* In this case there is a dependence forward for all the
5448              outer loops:
5449
5450              | for (k = 1; k < 100; k++)
5451              |  for (i = 1; i < 100; i++)
5452              |   for (j = 1; j < 100; j++)
5453              |     {
5454              |       t = T[j+1][i-1];  // A
5455              |       T[j][i] = t + 2;  // B
5456              |     }
5457
5458              the vectors are:
5459              (0,  1, -1)
5460              (1,  1, -1)
5461              (1, -1,  1)
5462           */
5463           if (DDR_NB_LOOPS (ddr) > 1)
5464             {
5465               add_outer_distances (ddr, save_v, index_carry);
5466               add_outer_distances (ddr, dist_v, index_carry);
5467             }
5468         }
5469       else
5470         {
5471           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5472           lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5473
5474           if (DDR_NB_LOOPS (ddr) > 1)
5475             {
5476               lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5477
5478               if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5479                 return false;
5480               compute_subscript_distance (ddr);
5481               if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5482                                                 &index_carry))
5483                 return false;
5484
5485               save_dist_v (ddr, save_v);
5486               add_outer_distances (ddr, dist_v, index_carry);
5487               add_outer_distances (ddr, opposite_v, index_carry);
5488             }
5489           else
5490             save_dist_v (ddr, save_v);
5491         }
5492     }
5493   else
5494     {
5495       /* There is a distance of 1 on all the outer loops: Example:
5496          there is a dependence of distance 1 on loop_1 for the array A.
5497
5498          | loop_1
5499          |   A[5] = ...
5500          | endloop
5501       */
5502       add_outer_distances (ddr, dist_v,
5503                            lambda_vector_first_nz (dist_v,
5504                                                    DDR_NB_LOOPS (ddr), 0));
5505     }
5506
5507   if (dump_file && (dump_flags & TDF_DETAILS))
5508     {
5509       unsigned i;
5510
5511       fprintf (dump_file, "(build_classic_dist_vector\n");
5512       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5513         {
5514           fprintf (dump_file, "  dist_vector = (");
5515           print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5516                                DDR_NB_LOOPS (ddr));
5517           fprintf (dump_file, "  )\n");
5518         }
5519       fprintf (dump_file, ")\n");
5520     }
5521
5522   return true;
5523 }
5524
5525 /* Return the direction for a given distance.
5526    FIXME: Computing dir this way is suboptimal, since dir can catch
5527    cases that dist is unable to represent.  */
5528
5529 static inline enum data_dependence_direction
5530 dir_from_dist (int dist)
5531 {
5532   if (dist > 0)
5533     return dir_positive;
5534   else if (dist < 0)
5535     return dir_negative;
5536   else
5537     return dir_equal;
5538 }
5539
5540 /* Compute the classic per loop direction vector.  DDR is the data
5541    dependence relation to build a vector from.  */
5542
5543 static void
5544 build_classic_dir_vector (struct data_dependence_relation *ddr)
5545 {
5546   unsigned i, j;
5547   lambda_vector dist_v;
5548
5549   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5550     {
5551       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5552
5553       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5554         dir_v[j] = dir_from_dist (dist_v[j]);
5555
5556       save_dir_v (ddr, dir_v);
5557     }
5558 }
5559
5560 /* Helper function.  Returns true when there is a dependence between the
5561    data references.  A_INDEX is the index of the first reference (0 for
5562    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
5563
5564 static bool
5565 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5566                                unsigned int a_index, unsigned int b_index,
5567                                class loop *loop_nest)
5568 {
5569   unsigned int i;
5570   tree last_conflicts;
5571   struct subscript *subscript;
5572   tree res = NULL_TREE;
5573
5574   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5575     {
5576       conflict_function *overlaps_a, *overlaps_b;
5577
5578       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5579                                       SUB_ACCESS_FN (subscript, b_index),
5580                                       &overlaps_a, &overlaps_b,
5581                                       &last_conflicts, loop_nest);
5582
5583       if (SUB_CONFLICTS_IN_A (subscript))
5584         free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5585       if (SUB_CONFLICTS_IN_B (subscript))
5586         free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5587
5588       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5589       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5590       SUB_LAST_CONFLICT (subscript) = last_conflicts;
5591
5592       /* If there is any undetermined conflict function we have to
5593          give a conservative answer in case we cannot prove that
5594          no dependence exists when analyzing another subscript.  */
5595       if (CF_NOT_KNOWN_P (overlaps_a)
5596           || CF_NOT_KNOWN_P (overlaps_b))
5597         {
5598           res = chrec_dont_know;
5599           continue;
5600         }
5601
5602       /* When there is a subscript with no dependence we can stop.  */
5603       else if (CF_NO_DEPENDENCE_P (overlaps_a)
5604                || CF_NO_DEPENDENCE_P (overlaps_b))
5605         {
5606           res = chrec_known;
5607           break;
5608         }
5609     }
5610
5611   if (res == NULL_TREE)
5612     return true;
5613
5614   if (res == chrec_known)
5615     dependence_stats.num_dependence_independent++;
5616   else
5617     dependence_stats.num_dependence_undetermined++;
5618   finalize_ddr_dependent (ddr, res);
5619   return false;
5620 }
5621
5622 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
5623
5624 static void
5625 subscript_dependence_tester (struct data_dependence_relation *ddr,
5626                              class loop *loop_nest)
5627 {
5628   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5629     dependence_stats.num_dependence_dependent++;
5630
5631   compute_subscript_distance (ddr);
5632   if (build_classic_dist_vector (ddr, loop_nest))
5633     build_classic_dir_vector (ddr);
5634 }
5635
5636 /* Returns true when all the access functions of A are affine or
5637    constant with respect to LOOP_NEST.  */
5638
5639 static bool
5640 access_functions_are_affine_or_constant_p (const struct data_reference *a,
5641                                            const class loop *loop_nest)
5642 {
5643   vec<tree> fns = DR_ACCESS_FNS (a);
5644   for (tree t : fns)
5645     if (!evolution_function_is_invariant_p (t, loop_nest->num)
5646         && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5647       return false;
5648
5649   return true;
5650 }
5651
5652 /* This computes the affine dependence relation between A and B with
5653    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
5654    independence between two accesses, while CHREC_DONT_KNOW is used
5655    for representing the unknown relation.
5656
5657    Note that it is possible to stop the computation of the dependence
5658    relation the first time we detect a CHREC_KNOWN element for a given
5659    subscript.  */
5660
5661 void
5662 compute_affine_dependence (struct data_dependence_relation *ddr,
5663                            class loop *loop_nest)
5664 {
5665   struct data_reference *dra = DDR_A (ddr);
5666   struct data_reference *drb = DDR_B (ddr);
5667
5668   if (dump_file && (dump_flags & TDF_DETAILS))
5669     {
5670       fprintf (dump_file, "(compute_affine_dependence\n");
5671       fprintf (dump_file, "  ref_a: ");
5672       print_generic_expr (dump_file, DR_REF (dra));
5673       fprintf (dump_file, ", stmt_a: ");
5674       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5675       fprintf (dump_file, "  ref_b: ");
5676       print_generic_expr (dump_file, DR_REF (drb));
5677       fprintf (dump_file, ", stmt_b: ");
5678       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5679     }
5680
5681   /* Analyze only when the dependence relation is not yet known.  */
5682   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5683     {
5684       dependence_stats.num_dependence_tests++;
5685
5686       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5687           && access_functions_are_affine_or_constant_p (drb, loop_nest))
5688         subscript_dependence_tester (ddr, loop_nest);
5689
5690       /* As a last case, if the dependence cannot be determined, or if
5691          the dependence is considered too difficult to determine, answer
5692          "don't know".  */
5693       else
5694         {
5695           dependence_stats.num_dependence_undetermined++;
5696
5697           if (dump_file && (dump_flags & TDF_DETAILS))
5698             {
5699               fprintf (dump_file, "Data ref a:\n");
5700               dump_data_reference (dump_file, dra);
5701               fprintf (dump_file, "Data ref b:\n");
5702               dump_data_reference (dump_file, drb);
5703               fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5704             }
5705           finalize_ddr_dependent (ddr, chrec_dont_know);
5706         }
5707     }
5708
5709   if (dump_file && (dump_flags & TDF_DETAILS))
5710     {
5711       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5712         fprintf (dump_file, ") -> no dependence\n");
5713       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5714         fprintf (dump_file, ") -> dependence analysis failed\n");
5715       else
5716         fprintf (dump_file, ")\n");
5717     }
5718 }
5719
5720 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5721    the data references in DATAREFS, in the LOOP_NEST.  When
5722    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5723    relations.  Return true when successful, i.e. data references number
5724    is small enough to be handled.  */
5725
5726 bool
5727 compute_all_dependences (const vec<data_reference_p> &datarefs,
5728                          vec<ddr_p> *dependence_relations,
5729                          const vec<loop_p> &loop_nest,
5730                          bool compute_self_and_rr)
5731 {
5732   struct data_dependence_relation *ddr;
5733   struct data_reference *a, *b;
5734   unsigned int i, j;
5735
5736   if ((int) datarefs.length ()
5737       > param_loop_max_datarefs_for_datadeps)
5738     {
5739       struct data_dependence_relation *ddr;
5740
5741       /* Insert a single relation into dependence_relations:
5742          chrec_dont_know.  */
5743       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5744       dependence_relations->safe_push (ddr);
5745       return false;
5746     }
5747
5748   FOR_EACH_VEC_ELT (datarefs, i, a)
5749     for (j = i + 1; datarefs.iterate (j, &b); j++)
5750       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5751         {
5752           ddr = initialize_data_dependence_relation (a, b, loop_nest);
5753           dependence_relations->safe_push (ddr);
5754           if (loop_nest.exists ())
5755             compute_affine_dependence (ddr, loop_nest[0]);
5756         }
5757
5758   if (compute_self_and_rr)
5759     FOR_EACH_VEC_ELT (datarefs, i, a)
5760       {
5761         ddr = initialize_data_dependence_relation (a, a, loop_nest);
5762         dependence_relations->safe_push (ddr);
5763         if (loop_nest.exists ())
5764           compute_affine_dependence (ddr, loop_nest[0]);
5765       }
5766
5767   return true;
5768 }
5769
5770 /* Describes a location of a memory reference.  */
5771
5772 struct data_ref_loc
5773 {
5774   /* The memory reference.  */
5775   tree ref;
5776
5777   /* True if the memory reference is read.  */
5778   bool is_read;
5779
5780   /* True if the data reference is conditional within the containing
5781      statement, i.e. if it might not occur even when the statement
5782      is executed and runs to completion.  */
5783   bool is_conditional_in_stmt;
5784 };
5785
5786
5787 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
5788    true if STMT clobbers memory, false otherwise.  */
5789
5790 static bool
5791 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5792 {
5793   bool clobbers_memory = false;
5794   data_ref_loc ref;
5795   tree op0, op1;
5796   enum gimple_code stmt_code = gimple_code (stmt);
5797
5798   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5799      As we cannot model data-references to not spelled out
5800      accesses give up if they may occur.  */
5801   if (stmt_code == GIMPLE_CALL
5802       && !(gimple_call_flags (stmt) & ECF_CONST))
5803     {
5804       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
5805       if (gimple_call_internal_p (stmt))
5806         switch (gimple_call_internal_fn (stmt))
5807           {
5808           case IFN_GOMP_SIMD_LANE:
5809             {
5810               class loop *loop = gimple_bb (stmt)->loop_father;
5811               tree uid = gimple_call_arg (stmt, 0);
5812               gcc_assert (TREE_CODE (uid) == SSA_NAME);
5813               if (loop == NULL
5814                   || loop->simduid != SSA_NAME_VAR (uid))
5815                 clobbers_memory = true;
5816               break;
5817             }
5818           case IFN_MASK_LOAD:
5819           case IFN_MASK_STORE:
5820           break;
5821           case IFN_MASK_CALL:
5822             {
5823               tree orig_fndecl
5824                 = gimple_call_addr_fndecl (gimple_call_arg (stmt, 0));
5825               if (!orig_fndecl
5826                   || (flags_from_decl_or_type (orig_fndecl) & ECF_CONST) == 0)
5827                 clobbers_memory = true;
5828             }
5829             break;
5830           default:
5831             clobbers_memory = true;
5832             break;
5833           }
5834       else
5835         clobbers_memory = true;
5836     }
5837   else if (stmt_code == GIMPLE_ASM
5838            && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5839                || gimple_vuse (stmt)))
5840     clobbers_memory = true;
5841
5842   if (!gimple_vuse (stmt))
5843     return clobbers_memory;
5844
5845   if (stmt_code == GIMPLE_ASSIGN)
5846     {
5847       tree base;
5848       op0 = gimple_assign_lhs (stmt);
5849       op1 = gimple_assign_rhs1 (stmt);
5850
5851       if (DECL_P (op1)
5852           || (REFERENCE_CLASS_P (op1)
5853               && (base = get_base_address (op1))
5854               && TREE_CODE (base) != SSA_NAME
5855               && !is_gimple_min_invariant (base)))
5856         {
5857           ref.ref = op1;
5858           ref.is_read = true;
5859           ref.is_conditional_in_stmt = false;
5860           references->safe_push (ref);
5861         }
5862     }
5863   else if (stmt_code == GIMPLE_CALL)
5864     {
5865       unsigned i = 0, n;
5866       tree ptr, type;
5867       unsigned int align;
5868
5869       ref.is_read = false;
5870       if (gimple_call_internal_p (stmt))
5871         switch (gimple_call_internal_fn (stmt))
5872           {
5873           case IFN_MASK_LOAD:
5874             if (gimple_call_lhs (stmt) == NULL_TREE)
5875               break;
5876             ref.is_read = true;
5877             /* FALLTHRU */
5878           case IFN_MASK_STORE:
5879             ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5880             align = tree_to_shwi (gimple_call_arg (stmt, 1));
5881             if (ref.is_read)
5882               type = TREE_TYPE (gimple_call_lhs (stmt));
5883             else
5884               type = TREE_TYPE (gimple_call_arg (stmt, 3));
5885             if (TYPE_ALIGN (type) != align)
5886               type = build_aligned_type (type, align);
5887             ref.is_conditional_in_stmt = true;
5888             ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5889                                    ptr);
5890             references->safe_push (ref);
5891             return false;
5892           case IFN_MASK_CALL:
5893             i = 1;
5894             gcc_fallthrough ();
5895           default:
5896             break;
5897           }
5898
5899       op0 = gimple_call_lhs (stmt);
5900       n = gimple_call_num_args (stmt);
5901       for (; i < n; i++)
5902         {
5903           op1 = gimple_call_arg (stmt, i);
5904
5905           if (DECL_P (op1)
5906               || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5907             {
5908               ref.ref = op1;
5909               ref.is_read = true;
5910               ref.is_conditional_in_stmt = false;
5911               references->safe_push (ref);
5912             }
5913         }
5914     }
5915   else
5916     return clobbers_memory;
5917
5918   if (op0
5919       && (DECL_P (op0)
5920           || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5921     {
5922       ref.ref = op0;
5923       ref.is_read = false;
5924       ref.is_conditional_in_stmt = false;
5925       references->safe_push (ref);
5926     }
5927   return clobbers_memory;
5928 }
5929
5930
5931 /* Returns true if the loop-nest has any data reference.  */
5932
5933 bool
5934 loop_nest_has_data_refs (loop_p loop)
5935 {
5936   basic_block *bbs = get_loop_body (loop);
5937   auto_vec<data_ref_loc, 3> references;
5938
5939   for (unsigned i = 0; i < loop->num_nodes; i++)
5940     {
5941       basic_block bb = bbs[i];
5942       gimple_stmt_iterator bsi;
5943
5944       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5945         {
5946           gimple *stmt = gsi_stmt (bsi);
5947           get_references_in_stmt (stmt, &references);
5948           if (references.length ())
5949             {
5950               free (bbs);
5951               return true;
5952             }
5953         }
5954     }
5955   free (bbs);
5956   return false;
5957 }
5958
5959 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
5960    reference, returns false, otherwise returns true.  NEST is the outermost
5961    loop of the loop nest in which the references should be analyzed.  */
5962
5963 opt_result
5964 find_data_references_in_stmt (class loop *nest, gimple *stmt,
5965                               vec<data_reference_p> *datarefs)
5966 {
5967   auto_vec<data_ref_loc, 2> references;
5968   data_reference_p dr;
5969
5970   if (get_references_in_stmt (stmt, &references))
5971     return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5972                                    stmt);
5973
5974   for (const data_ref_loc &ref : references)
5975     {
5976       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5977                             loop_containing_stmt (stmt), ref.ref,
5978                             stmt, ref.is_read, ref.is_conditional_in_stmt);
5979       gcc_assert (dr != NULL);
5980       datarefs->safe_push (dr);
5981     }
5982
5983   return opt_result::success ();
5984 }
5985
5986 /* Stores the data references in STMT to DATAREFS.  If there is an
5987    unanalyzable reference, returns false, otherwise returns true.
5988    NEST is the outermost loop of the loop nest in which the references
5989    should be instantiated, LOOP is the loop in which the references
5990    should be analyzed.  */
5991
5992 bool
5993 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5994                                        vec<data_reference_p> *datarefs)
5995 {
5996   auto_vec<data_ref_loc, 2> references;
5997   bool ret = true;
5998   data_reference_p dr;
5999
6000   if (get_references_in_stmt (stmt, &references))
6001     return false;
6002
6003   for (const data_ref_loc &ref : references)
6004     {
6005       dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read,
6006                             ref.is_conditional_in_stmt);
6007       gcc_assert (dr != NULL);
6008       datarefs->safe_push (dr);
6009     }
6010
6011   return ret;
6012 }
6013
6014 /* Search the data references in LOOP, and record the information into
6015    DATAREFS.  Returns chrec_dont_know when failing to analyze a
6016    difficult case, returns NULL_TREE otherwise.  */
6017
6018 tree
6019 find_data_references_in_bb (class loop *loop, basic_block bb,
6020                             vec<data_reference_p> *datarefs)
6021 {
6022   gimple_stmt_iterator bsi;
6023
6024   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
6025     {
6026       gimple *stmt = gsi_stmt (bsi);
6027
6028       if (!find_data_references_in_stmt (loop, stmt, datarefs))
6029         {
6030           struct data_reference *res;
6031           res = XCNEW (struct data_reference);
6032           datarefs->safe_push (res);
6033
6034           return chrec_dont_know;
6035         }
6036     }
6037
6038   return NULL_TREE;
6039 }
6040
6041 /* Search the data references in LOOP, and record the information into
6042    DATAREFS.  Returns chrec_dont_know when failing to analyze a
6043    difficult case, returns NULL_TREE otherwise.
6044
6045    TODO: This function should be made smarter so that it can handle address
6046    arithmetic as if they were array accesses, etc.  */
6047
6048 tree
6049 find_data_references_in_loop (class loop *loop,
6050                               vec<data_reference_p> *datarefs)
6051 {
6052   basic_block bb, *bbs;
6053   unsigned int i;
6054
6055   bbs = get_loop_body_in_dom_order (loop);
6056
6057   for (i = 0; i < loop->num_nodes; i++)
6058     {
6059       bb = bbs[i];
6060
6061       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
6062         {
6063           free (bbs);
6064           return chrec_dont_know;
6065         }
6066     }
6067   free (bbs);
6068
6069   return NULL_TREE;
6070 }
6071
6072 /* Return the alignment in bytes that DRB is guaranteed to have at all
6073    times.  */
6074
6075 unsigned int
6076 dr_alignment (innermost_loop_behavior *drb)
6077 {
6078   /* Get the alignment of BASE_ADDRESS + INIT.  */
6079   unsigned int alignment = drb->base_alignment;
6080   unsigned int misalignment = (drb->base_misalignment
6081                                + TREE_INT_CST_LOW (drb->init));
6082   if (misalignment != 0)
6083     alignment = MIN (alignment, misalignment & -misalignment);
6084
6085   /* Cap it to the alignment of OFFSET.  */
6086   if (!integer_zerop (drb->offset))
6087     alignment = MIN (alignment, drb->offset_alignment);
6088
6089   /* Cap it to the alignment of STEP.  */
6090   if (!integer_zerop (drb->step))
6091     alignment = MIN (alignment, drb->step_alignment);
6092
6093   return alignment;
6094 }
6095
6096 /* If BASE is a pointer-typed SSA name, try to find the object that it
6097    is based on.  Return this object X on success and store the alignment
6098    in bytes of BASE - &X in *ALIGNMENT_OUT.  */
6099
6100 static tree
6101 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
6102 {
6103   if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
6104     return NULL_TREE;
6105
6106   gimple *def = SSA_NAME_DEF_STMT (base);
6107   base = analyze_scalar_evolution (loop_containing_stmt (def), base);
6108
6109   /* Peel chrecs and record the minimum alignment preserved by
6110      all steps.  */
6111   unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6112   while (TREE_CODE (base) == POLYNOMIAL_CHREC)
6113     {
6114       unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
6115       alignment = MIN (alignment, step_alignment);
6116       base = CHREC_LEFT (base);
6117     }
6118
6119   /* Punt if the expression is too complicated to handle.  */
6120   if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
6121     return NULL_TREE;
6122
6123   /* The only useful cases are those for which a dereference folds to something
6124      other than an INDIRECT_REF.  */
6125   tree ref_type = TREE_TYPE (TREE_TYPE (base));
6126   tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
6127   if (!ref)
6128     return NULL_TREE;
6129
6130   /* Analyze the base to which the steps we peeled were applied.  */
6131   poly_int64 bitsize, bitpos, bytepos;
6132   machine_mode mode;
6133   int unsignedp, reversep, volatilep;
6134   tree offset;
6135   base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
6136                               &unsignedp, &reversep, &volatilep);
6137   if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
6138     return NULL_TREE;
6139
6140   /* Restrict the alignment to that guaranteed by the offsets.  */
6141   unsigned int bytepos_alignment = known_alignment (bytepos);
6142   if (bytepos_alignment != 0)
6143     alignment = MIN (alignment, bytepos_alignment);
6144   if (offset)
6145     {
6146       unsigned int offset_alignment = highest_pow2_factor (offset);
6147       alignment = MIN (alignment, offset_alignment);
6148     }
6149
6150   *alignment_out = alignment;
6151   return base;
6152 }
6153
6154 /* Return the object whose alignment would need to be changed in order
6155    to increase the alignment of ADDR.  Store the maximum achievable
6156    alignment in *MAX_ALIGNMENT.  */
6157
6158 tree
6159 get_base_for_alignment (tree addr, unsigned int *max_alignment)
6160 {
6161   tree base = get_base_for_alignment_1 (addr, max_alignment);
6162   if (base)
6163     return base;
6164
6165   if (TREE_CODE (addr) == ADDR_EXPR)
6166     addr = TREE_OPERAND (addr, 0);
6167   *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6168   return addr;
6169 }
6170
6171 /* Recursive helper function.  */
6172
6173 static bool
6174 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
6175 {
6176   /* Inner loops of the nest should not contain siblings.  Example:
6177      when there are two consecutive loops,
6178
6179      | loop_0
6180      |   loop_1
6181      |     A[{0, +, 1}_1]
6182      |   endloop_1
6183      |   loop_2
6184      |     A[{0, +, 1}_2]
6185      |   endloop_2
6186      | endloop_0
6187
6188      the dependence relation cannot be captured by the distance
6189      abstraction.  */
6190   if (loop->next)
6191     return false;
6192
6193   loop_nest->safe_push (loop);
6194   if (loop->inner)
6195     return find_loop_nest_1 (loop->inner, loop_nest);
6196   return true;
6197 }
6198
6199 /* Return false when the LOOP is not well nested.  Otherwise return
6200    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
6201    contain the loops from the outermost to the innermost, as they will
6202    appear in the classic distance vector.  */
6203
6204 bool
6205 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
6206 {
6207   loop_nest->safe_push (loop);
6208   if (loop->inner)
6209     return find_loop_nest_1 (loop->inner, loop_nest);
6210   return true;
6211 }
6212
6213 /* Returns true when the data dependences have been computed, false otherwise.
6214    Given a loop nest LOOP, the following vectors are returned:
6215    DATAREFS is initialized to all the array elements contained in this loop,
6216    DEPENDENCE_RELATIONS contains the relations between the data references.
6217    Compute read-read and self relations if
6218    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
6219
6220 bool
6221 compute_data_dependences_for_loop (class loop *loop,
6222                                    bool compute_self_and_read_read_dependences,
6223                                    vec<loop_p> *loop_nest,
6224                                    vec<data_reference_p> *datarefs,
6225                                    vec<ddr_p> *dependence_relations)
6226 {
6227   bool res = true;
6228
6229   memset (&dependence_stats, 0, sizeof (dependence_stats));
6230
6231   /* If the loop nest is not well formed, or one of the data references
6232      is not computable, give up without spending time to compute other
6233      dependences.  */
6234   if (!loop
6235       || !find_loop_nest (loop, loop_nest)
6236       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
6237       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
6238                                    compute_self_and_read_read_dependences))
6239     res = false;
6240
6241   if (dump_file && (dump_flags & TDF_STATS))
6242     {
6243       fprintf (dump_file, "Dependence tester statistics:\n");
6244
6245       fprintf (dump_file, "Number of dependence tests: %d\n",
6246                dependence_stats.num_dependence_tests);
6247       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
6248                dependence_stats.num_dependence_dependent);
6249       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
6250                dependence_stats.num_dependence_independent);
6251       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
6252                dependence_stats.num_dependence_undetermined);
6253
6254       fprintf (dump_file, "Number of subscript tests: %d\n",
6255                dependence_stats.num_subscript_tests);
6256       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
6257                dependence_stats.num_subscript_undetermined);
6258       fprintf (dump_file, "Number of same subscript function: %d\n",
6259                dependence_stats.num_same_subscript_function);
6260
6261       fprintf (dump_file, "Number of ziv tests: %d\n",
6262                dependence_stats.num_ziv);
6263       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
6264                dependence_stats.num_ziv_dependent);
6265       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
6266                dependence_stats.num_ziv_independent);
6267       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
6268                dependence_stats.num_ziv_unimplemented);
6269
6270       fprintf (dump_file, "Number of siv tests: %d\n",
6271                dependence_stats.num_siv);
6272       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
6273                dependence_stats.num_siv_dependent);
6274       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
6275                dependence_stats.num_siv_independent);
6276       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
6277                dependence_stats.num_siv_unimplemented);
6278
6279       fprintf (dump_file, "Number of miv tests: %d\n",
6280                dependence_stats.num_miv);
6281       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
6282                dependence_stats.num_miv_dependent);
6283       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
6284                dependence_stats.num_miv_independent);
6285       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
6286                dependence_stats.num_miv_unimplemented);
6287     }
6288
6289   return res;
6290 }
6291
6292 /* Free the memory used by a data dependence relation DDR.  */
6293
6294 void
6295 free_dependence_relation (struct data_dependence_relation *ddr)
6296 {
6297   if (ddr == NULL)
6298     return;
6299
6300   if (DDR_SUBSCRIPTS (ddr).exists ())
6301     free_subscripts (DDR_SUBSCRIPTS (ddr));
6302   DDR_DIST_VECTS (ddr).release ();
6303   DDR_DIR_VECTS (ddr).release ();
6304
6305   free (ddr);
6306 }
6307
6308 /* Free the memory used by the data dependence relations from
6309    DEPENDENCE_RELATIONS.  */
6310
6311 void
6312 free_dependence_relations (vec<ddr_p>& dependence_relations)
6313 {
6314   for (data_dependence_relation *ddr : dependence_relations)
6315     if (ddr)
6316       free_dependence_relation (ddr);
6317
6318   dependence_relations.release ();
6319 }
6320
6321 /* Free the memory used by the data references from DATAREFS.  */
6322
6323 void
6324 free_data_refs (vec<data_reference_p>& datarefs)
6325 {
6326   for (data_reference *dr : datarefs)
6327     free_data_ref (dr);
6328   datarefs.release ();
6329 }
6330
6331 /* Common routine implementing both dr_direction_indicator and
6332    dr_zero_step_indicator.  Return USEFUL_MIN if the indicator is known
6333    to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6334    Return the step as the indicator otherwise.  */
6335
6336 static tree
6337 dr_step_indicator (struct data_reference *dr, int useful_min)
6338 {
6339   tree step = DR_STEP (dr);
6340   if (!step)
6341     return NULL_TREE;
6342   STRIP_NOPS (step);
6343   /* Look for cases where the step is scaled by a positive constant
6344      integer, which will often be the access size.  If the multiplication
6345      doesn't change the sign (due to overflow effects) then we can
6346      test the unscaled value instead.  */
6347   if (TREE_CODE (step) == MULT_EXPR
6348       && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6349       && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6350     {
6351       tree factor = TREE_OPERAND (step, 1);
6352       step = TREE_OPERAND (step, 0);
6353
6354       /* Strip widening and truncating conversions as well as nops.  */
6355       if (CONVERT_EXPR_P (step)
6356           && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6357         step = TREE_OPERAND (step, 0);
6358       tree type = TREE_TYPE (step);
6359
6360       /* Get the range of step values that would not cause overflow.  */
6361       widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6362                          / wi::to_widest (factor));
6363       widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6364                          / wi::to_widest (factor));
6365
6366       /* Get the range of values that the unconverted step actually has.  */
6367       wide_int step_min, step_max;
6368       value_range vr;
6369       if (TREE_CODE (step) != SSA_NAME
6370           || !get_range_query (cfun)->range_of_expr (vr, step)
6371           || vr.undefined_p ())
6372         {
6373           step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6374           step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6375         }
6376       else
6377         {
6378           step_min = vr.lower_bound ();
6379           step_max = vr.upper_bound ();
6380         }
6381
6382       /* Check whether the unconverted step has an acceptable range.  */
6383       signop sgn = TYPE_SIGN (type);
6384       if (wi::les_p (minv, widest_int::from (step_min, sgn))
6385           && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6386         {
6387           if (wi::ge_p (step_min, useful_min, sgn))
6388             return ssize_int (useful_min);
6389           else if (wi::lt_p (step_max, 0, sgn))
6390             return ssize_int (-1);
6391           else
6392             return fold_convert (ssizetype, step);
6393         }
6394     }
6395   return DR_STEP (dr);
6396 }
6397
6398 /* Return a value that is negative iff DR has a negative step.  */
6399
6400 tree
6401 dr_direction_indicator (struct data_reference *dr)
6402 {
6403   return dr_step_indicator (dr, 0);
6404 }
6405
6406 /* Return a value that is zero iff DR has a zero step.  */
6407
6408 tree
6409 dr_zero_step_indicator (struct data_reference *dr)
6410 {
6411   return dr_step_indicator (dr, 1);
6412 }
6413
6414 /* Return true if DR is known to have a nonnegative (but possibly zero)
6415    step.  */
6416
6417 bool
6418 dr_known_forward_stride_p (struct data_reference *dr)
6419 {
6420   tree indicator = dr_direction_indicator (dr);
6421   tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6422                                    fold_convert (ssizetype, indicator),
6423                                    ssize_int (0));
6424   return neg_step_val && integer_zerop (neg_step_val);
6425 }