gcc/tree-data-ref.cc

   1 /* Data references and dependences detectors.
   2    Copyright (C) 2003-2022 Free Software Foundation, Inc.
   3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass walks a given loop structure searching for array
  22    references.  The information about the array accesses is recorded
  23    in DATA_REFERENCE structures.
  24
  25    The basic test for determining the dependences is:
  26    given two access functions chrec1 and chrec2 to a same array, and
  27    x and y two vectors from the iteration domain, the same element of
  28    the array is accessed twice at iterations x and y if and only if:
  29    |             chrec1 (x) == chrec2 (y).
  30
  31    The goals of this analysis are:
  32
  33    - to determine the independence: the relation between two
  34      independent accesses is qualified with the chrec_known (this
  35      information allows a loop parallelization),
  36
  37    - when two data references access the same data, to qualify the
  38      dependence relation with classic dependence representations:
  39
  40        - distance vectors
  41        - direction vectors
  42        - loop carried level dependence
  43        - polyhedron dependence
  44      or with the chains of recurrences based representation,
  45
  46    - to define a knowledge base for storing the data dependence
  47      information,
  48
  49    - to define an interface to access this data.
  50
  51
  52    Definitions:
  53
  54    - subscript: given two array accesses a subscript is the tuple
  55    composed of the access functions for a given dimension.  Example:
  56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
  57    (f1, g1), (f2, g2), (f3, g3).
  58
  59    - Diophantine equation: an equation whose coefficients and
  60    solutions are integer constants, for example the equation
  61    |   3*x + 2*y = 1
  62    has an integer solution x = 1 and y = -1.
  63
  64    References:
  65
  66    - "Advanced Compilation for High Performance Computing" by Randy
  67    Allen and Ken Kennedy.
  68    http://citeseer.ist.psu.edu/goff91practical.html
  69
  70    - "Loop Transformations for Restructuring Compilers - The Foundations"
  71    by Utpal Banerjee.
  72
  73
  74 */
  75
  76 #include "config.h"
  77 #include "system.h"
  78 #include "coretypes.h"
  79 #include "backend.h"
  80 #include "rtl.h"
  81 #include "tree.h"
  82 #include "gimple.h"
  83 #include "gimple-pretty-print.h"
  84 #include "alias.h"
  85 #include "fold-const.h"
  86 #include "expr.h"
  87 #include "gimple-iterator.h"
  88 #include "tree-ssa-loop-niter.h"
  89 #include "tree-ssa-loop.h"
  90 #include "tree-ssa.h"
  91 #include "cfgloop.h"
  92 #include "tree-data-ref.h"
  93 #include "tree-scalar-evolution.h"
  94 #include "dumpfile.h"
  95 #include "tree-affine.h"
  96 #include "builtins.h"
  97 #include "tree-eh.h"
  98 #include "ssa.h"
  99 #include "internal-fn.h"
 100 #include "vr-values.h"
 101 #include "range-op.h"
 102 #include "tree-ssa-loop-ivopts.h"
 103
 104 static struct datadep_stats
 105 {
 106   int num_dependence_tests;
 107   int num_dependence_dependent;
 108   int num_dependence_independent;
 109   int num_dependence_undetermined;
 110
 111   int num_subscript_tests;
 112   int num_subscript_undetermined;
 113   int num_same_subscript_function;
 114
 115   int num_ziv;
 116   int num_ziv_independent;
 117   int num_ziv_dependent;
 118   int num_ziv_unimplemented;
 119
 120   int num_siv;
 121   int num_siv_independent;
 122   int num_siv_dependent;
 123   int num_siv_unimplemented;
 124
 125   int num_miv;
 126   int num_miv_independent;
 127   int num_miv_dependent;
 128   int num_miv_unimplemented;
 129 } dependence_stats;
 130
 131 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
 132                                            unsigned int, unsigned int,
 133                                            class loop *);
 134 /* Returns true iff A divides B.  */
 135
 136 static inline bool
 137 tree_fold_divides_p (const_tree a, const_tree b)
 138 {
 139   gcc_assert (TREE_CODE (a) == INTEGER_CST);
 140   gcc_assert (TREE_CODE (b) == INTEGER_CST);
 141   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
 142 }
 143
 144 /* Returns true iff A divides B.  */
 145
 146 static inline bool
 147 int_divides_p (lambda_int a, lambda_int b)
 148 {
 149   return ((b % a) == 0);
 150 }
 151
 152 /* Return true if reference REF contains a union access.  */
 153
 154 static bool
 155 ref_contains_union_access_p (tree ref)
 156 {
 157   while (handled_component_p (ref))
 158     {
 159       ref = TREE_OPERAND (ref, 0);
 160       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
 161           || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
 162         return true;
 163     }
 164   return false;
 165 }
 166
 167 \f
 168
 169 /* Dump into FILE all the data references from DATAREFS.  */
 170
 171 static void
 172 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
 173 {
 174   for (data_reference *dr : datarefs)
 175     dump_data_reference (file, dr);
 176 }
 177
 178 /* Unified dump into FILE all the data references from DATAREFS.  */
 179
 180 DEBUG_FUNCTION void
 181 debug (vec<data_reference_p> &ref)
 182 {
 183   dump_data_references (stderr, ref);
 184 }
 185
 186 DEBUG_FUNCTION void
 187 debug (vec<data_reference_p> *ptr)
 188 {
 189   if (ptr)
 190     debug (*ptr);
 191   else
 192     fprintf (stderr, "<nil>\n");
 193 }
 194
 195
 196 /* Dump into STDERR all the data references from DATAREFS.  */
 197
 198 DEBUG_FUNCTION void
 199 debug_data_references (vec<data_reference_p> datarefs)
 200 {
 201   dump_data_references (stderr, datarefs);
 202 }
 203
 204 /* Print to STDERR the data_reference DR.  */
 205
 206 DEBUG_FUNCTION void
 207 debug_data_reference (struct data_reference *dr)
 208 {
 209   dump_data_reference (stderr, dr);
 210 }
 211
 212 /* Dump function for a DATA_REFERENCE structure.  */
 213
 214 void
 215 dump_data_reference (FILE *outf,
 216                      struct data_reference *dr)
 217 {
 218   unsigned int i;
 219
 220   fprintf (outf, "#(Data Ref: \n");
 221   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
 222   fprintf (outf, "#  stmt: ");
 223   print_gimple_stmt (outf, DR_STMT (dr), 0);
 224   fprintf (outf, "#  ref: ");
 225   print_generic_stmt (outf, DR_REF (dr));
 226   fprintf (outf, "#  base_object: ");
 227   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
 228
 229   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
 230     {
 231       fprintf (outf, "#  Access function %d: ", i);
 232       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
 233     }
 234   fprintf (outf, "#)\n");
 235 }
 236
 237 /* Unified dump function for a DATA_REFERENCE structure.  */
 238
 239 DEBUG_FUNCTION void
 240 debug (data_reference &ref)
 241 {
 242   dump_data_reference (stderr, &ref);
 243 }
 244
 245 DEBUG_FUNCTION void
 246 debug (data_reference *ptr)
 247 {
 248   if (ptr)
 249     debug (*ptr);
 250   else
 251     fprintf (stderr, "<nil>\n");
 252 }
 253
 254
 255 /* Dumps the affine function described by FN to the file OUTF.  */
 256
 257 DEBUG_FUNCTION void
 258 dump_affine_function (FILE *outf, affine_fn fn)
 259 {
 260   unsigned i;
 261   tree coef;
 262
 263   print_generic_expr (outf, fn[0], TDF_SLIM);
 264   for (i = 1; fn.iterate (i, &coef); i++)
 265     {
 266       fprintf (outf, " + ");
 267       print_generic_expr (outf, coef, TDF_SLIM);
 268       fprintf (outf, " * x_%u", i);
 269     }
 270 }
 271
 272 /* Dumps the conflict function CF to the file OUTF.  */
 273
 274 DEBUG_FUNCTION void
 275 dump_conflict_function (FILE *outf, conflict_function *cf)
 276 {
 277   unsigned i;
 278
 279   if (cf->n == NO_DEPENDENCE)
 280     fprintf (outf, "no dependence");
 281   else if (cf->n == NOT_KNOWN)
 282     fprintf (outf, "not known");
 283   else
 284     {
 285       for (i = 0; i < cf->n; i++)
 286         {
 287           if (i != 0)
 288             fprintf (outf, " ");
 289           fprintf (outf, "[");
 290           dump_affine_function (outf, cf->fns[i]);
 291           fprintf (outf, "]");
 292         }
 293     }
 294 }
 295
 296 /* Dump function for a SUBSCRIPT structure.  */
 297
 298 DEBUG_FUNCTION void
 299 dump_subscript (FILE *outf, struct subscript *subscript)
 300 {
 301   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
 302
 303   fprintf (outf, "\n (subscript \n");
 304   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
 305   dump_conflict_function (outf, cf);
 306   if (CF_NONTRIVIAL_P (cf))
 307     {
 308       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 309       fprintf (outf, "\n  last_conflict: ");
 310       print_generic_expr (outf, last_iteration);
 311     }
 312
 313   cf = SUB_CONFLICTS_IN_B (subscript);
 314   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
 315   dump_conflict_function (outf, cf);
 316   if (CF_NONTRIVIAL_P (cf))
 317     {
 318       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 319       fprintf (outf, "\n  last_conflict: ");
 320       print_generic_expr (outf, last_iteration);
 321     }
 322
 323   fprintf (outf, "\n  (Subscript distance: ");
 324   print_generic_expr (outf, SUB_DISTANCE (subscript));
 325   fprintf (outf, " ))\n");
 326 }
 327
 328 /* Print the classic direction vector DIRV to OUTF.  */
 329
 330 DEBUG_FUNCTION void
 331 print_direction_vector (FILE *outf,
 332                         lambda_vector dirv,
 333                         int length)
 334 {
 335   int eq;
 336
 337   for (eq = 0; eq < length; eq++)
 338     {
 339       enum data_dependence_direction dir = ((enum data_dependence_direction)
 340                                             dirv[eq]);
 341
 342       switch (dir)
 343         {
 344         case dir_positive:
 345           fprintf (outf, "    +");
 346           break;
 347         case dir_negative:
 348           fprintf (outf, "    -");
 349           break;
 350         case dir_equal:
 351           fprintf (outf, "    =");
 352           break;
 353         case dir_positive_or_equal:
 354           fprintf (outf, "   +=");
 355           break;
 356         case dir_positive_or_negative:
 357           fprintf (outf, "   +-");
 358           break;
 359         case dir_negative_or_equal:
 360           fprintf (outf, "   -=");
 361           break;
 362         case dir_star:
 363           fprintf (outf, "    *");
 364           break;
 365         default:
 366           fprintf (outf, "indep");
 367           break;
 368         }
 369     }
 370   fprintf (outf, "\n");
 371 }
 372
 373 /* Print a vector of direction vectors.  */
 374
 375 DEBUG_FUNCTION void
 376 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
 377                    int length)
 378 {
 379   for (lambda_vector v : dir_vects)
 380     print_direction_vector (outf, v, length);
 381 }
 382
 383 /* Print out a vector VEC of length N to OUTFILE.  */
 384
 385 DEBUG_FUNCTION void
 386 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
 387 {
 388   int i;
 389
 390   for (i = 0; i < n; i++)
 391     fprintf (outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]);
 392   fprintf (outfile, "\n");
 393 }
 394
 395 /* Print a vector of distance vectors.  */
 396
 397 DEBUG_FUNCTION void
 398 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
 399                     int length)
 400 {
 401   for (lambda_vector v : dist_vects)
 402     print_lambda_vector (outf, v, length);
 403 }
 404
 405 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
 406
 407 DEBUG_FUNCTION void
 408 dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
 409 {
 410   struct data_reference *dra, *drb;
 411
 412   fprintf (outf, "(Data Dep: \n");
 413
 414   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 415     {
 416       if (ddr)
 417         {
 418           dra = DDR_A (ddr);
 419           drb = DDR_B (ddr);
 420           if (dra)
 421             dump_data_reference (outf, dra);
 422           else
 423             fprintf (outf, "    (nil)\n");
 424           if (drb)
 425             dump_data_reference (outf, drb);
 426           else
 427             fprintf (outf, "    (nil)\n");
 428         }
 429       fprintf (outf, "    (don't know)\n)\n");
 430       return;
 431     }
 432
 433   dra = DDR_A (ddr);
 434   drb = DDR_B (ddr);
 435   dump_data_reference (outf, dra);
 436   dump_data_reference (outf, drb);
 437
 438   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 439     fprintf (outf, "    (no dependence)\n");
 440
 441   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
 442     {
 443       unsigned int i;
 444       class loop *loopi;
 445
 446       subscript *sub;
 447       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
 448         {
 449           fprintf (outf, "  access_fn_A: ");
 450           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
 451           fprintf (outf, "  access_fn_B: ");
 452           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
 453           dump_subscript (outf, sub);
 454         }
 455
 456       fprintf (outf, "  loop nest: (");
 457       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
 458         fprintf (outf, "%d ", loopi->num);
 459       fprintf (outf, ")\n");
 460
 461       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
 462         {
 463           fprintf (outf, "  distance_vector: ");
 464           print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
 465                                DDR_NB_LOOPS (ddr));
 466         }
 467
 468       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
 469         {
 470           fprintf (outf, "  direction_vector: ");
 471           print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
 472                                   DDR_NB_LOOPS (ddr));
 473         }
 474     }
 475
 476   fprintf (outf, ")\n");
 477 }
 478
 479 /* Debug version.  */
 480
 481 DEBUG_FUNCTION void
 482 debug_data_dependence_relation (const struct data_dependence_relation *ddr)
 483 {
 484   dump_data_dependence_relation (stderr, ddr);
 485 }
 486
 487 /* Dump into FILE all the dependence relations from DDRS.  */
 488
 489 DEBUG_FUNCTION void
 490 dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
 491 {
 492   for (auto ddr : ddrs)
 493     dump_data_dependence_relation (file, ddr);
 494 }
 495
 496 DEBUG_FUNCTION void
 497 debug (vec<ddr_p> &ref)
 498 {
 499   dump_data_dependence_relations (stderr, ref);
 500 }
 501
 502 DEBUG_FUNCTION void
 503 debug (vec<ddr_p> *ptr)
 504 {
 505   if (ptr)
 506     debug (*ptr);
 507   else
 508     fprintf (stderr, "<nil>\n");
 509 }
 510
 511
 512 /* Dump to STDERR all the dependence relations from DDRS.  */
 513
 514 DEBUG_FUNCTION void
 515 debug_data_dependence_relations (vec<ddr_p> ddrs)
 516 {
 517   dump_data_dependence_relations (stderr, ddrs);
 518 }
 519
 520 /* Dumps the distance and direction vectors in FILE.  DDRS contains
 521    the dependence relations, and VECT_SIZE is the size of the
 522    dependence vectors, or in other words the number of loops in the
 523    considered nest.  */
 524
 525 DEBUG_FUNCTION void
 526 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
 527 {
 528   for (data_dependence_relation *ddr : ddrs)
 529     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
 530       {
 531         for (lambda_vector v : DDR_DIST_VECTS (ddr))
 532           {
 533             fprintf (file, "DISTANCE_V (");
 534             print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
 535             fprintf (file, ")\n");
 536           }
 537
 538         for (lambda_vector v : DDR_DIR_VECTS (ddr))
 539           {
 540             fprintf (file, "DIRECTION_V (");
 541             print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
 542             fprintf (file, ")\n");
 543           }
 544       }
 545
 546   fprintf (file, "\n\n");
 547 }
 548
 549 /* Dumps the data dependence relations DDRS in FILE.  */
 550
 551 DEBUG_FUNCTION void
 552 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
 553 {
 554   for (data_dependence_relation *ddr : ddrs)
 555     dump_data_dependence_relation (file, ddr);
 556
 557   fprintf (file, "\n\n");
 558 }
 559
 560 DEBUG_FUNCTION void
 561 debug_ddrs (vec<ddr_p> ddrs)
 562 {
 563   dump_ddrs (stderr, ddrs);
 564 }
 565
 566 /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
 567    OP0 CODE OP1, where:
 568
 569    - OP0 CODE OP1 has integral type TYPE
 570    - the range of OP0 is given by OP0_RANGE and
 571    - the range of OP1 is given by OP1_RANGE.
 572
 573    Independently of RESULT_RANGE, try to compute:
 574
 575      DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
 576              - (sizetype) (OP0 CODE OP1)
 577
 578    as a constant and subtract DELTA from the ssizetype constant in *OFF.
 579    Return true on success, or false if DELTA is not known at compile time.
 580
 581    Truncation and sign changes are known to distribute over CODE, i.e.
 582
 583      (itype) (A CODE B) == (itype) A CODE (itype) B
 584
 585    for any integral type ITYPE whose precision is no greater than the
 586    precision of A and B.  */
 587
 588 static bool
 589 compute_distributive_range (tree type, value_range &op0_range,
 590                             tree_code code, value_range &op1_range,
 591                             tree *off, value_range *result_range)
 592 {
 593   gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
 594   if (result_range)
 595     {
 596       range_op_handler op (code, type);
 597       op.fold_range (*result_range, type, op0_range, op1_range);
 598     }
 599
 600   /* The distributive property guarantees that if TYPE is no narrower
 601      than SIZETYPE,
 602
 603        (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
 604
 605      and so we can treat DELTA as zero.  */
 606   if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
 607     return true;
 608
 609   /* If overflow is undefined, we can assume that:
 610
 611        X == (ssizetype) OP0 CODE (ssizetype) OP1
 612
 613      is within the range of TYPE, i.e.:
 614
 615        X == (ssizetype) (TYPE) X
 616
 617      Distributing the (TYPE) truncation over X gives:
 618
 619        X == (ssizetype) (OP0 CODE OP1)
 620
 621      Casting both sides to sizetype and distributing the sizetype cast
 622      over X gives:
 623
 624        (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
 625
 626      and so we can treat DELTA as zero.  */
 627   if (TYPE_OVERFLOW_UNDEFINED (type))
 628     return true;
 629
 630   /* Compute the range of:
 631
 632        (ssizetype) OP0 CODE (ssizetype) OP1
 633
 634      The distributive property guarantees that this has the same bitpattern as:
 635
 636        (sizetype) OP0 CODE (sizetype) OP1
 637
 638      but its range is more conducive to analysis.  */
 639   range_cast (op0_range, ssizetype);
 640   range_cast (op1_range, ssizetype);
 641   value_range wide_range;
 642   range_op_handler op (code, ssizetype);
 643   bool saved_flag_wrapv = flag_wrapv;
 644   flag_wrapv = 1;
 645   op.fold_range (wide_range, ssizetype, op0_range, op1_range);
 646   flag_wrapv = saved_flag_wrapv;
 647   if (wide_range.num_pairs () != 1 || !range_int_cst_p (&wide_range))
 648     return false;
 649
 650   wide_int lb = wide_range.lower_bound ();
 651   wide_int ub = wide_range.upper_bound ();
 652
 653   /* Calculate the number of times that each end of the range overflows or
 654      underflows TYPE.  We can only calculate DELTA if the numbers match.  */
 655   unsigned int precision = TYPE_PRECISION (type);
 656   if (!TYPE_UNSIGNED (type))
 657     {
 658       wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
 659       lb -= type_min;
 660       ub -= type_min;
 661     }
 662   wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
 663   lb &= upper_bits;
 664   ub &= upper_bits;
 665   if (lb != ub)
 666     return false;
 667
 668   /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
 669      negative values indicating underflow.  The low PRECISION bits of LB
 670      are clear, so DELTA is therefore LB (== UB).  */
 671   *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
 672   return true;
 673 }
 674
 675 /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
 676    given that OP has type FROM_TYPE and range RANGE.  Both TO_TYPE and
 677    FROM_TYPE are integral types.  */
 678
 679 static bool
 680 nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range)
 681 {
 682   gcc_assert (INTEGRAL_TYPE_P (to_type)
 683               && INTEGRAL_TYPE_P (from_type)
 684               && !TYPE_OVERFLOW_TRAPS (to_type)
 685               && !TYPE_OVERFLOW_TRAPS (from_type));
 686
 687   /* Converting to something no narrower than sizetype and then to sizetype
 688      is equivalent to converting directly to sizetype.  */
 689   if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
 690     return true;
 691
 692   /* Check whether TO_TYPE can represent all values that FROM_TYPE can.  */
 693   if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
 694       && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
 695     return true;
 696
 697   /* For narrowing conversions, we could in principle test whether
 698      the bits in FROM_TYPE but not in TO_TYPE have a fixed value
 699      and apply a constant adjustment.
 700
 701      For other conversions (which involve a sign change) we could
 702      check that the signs are always equal, and apply a constant
 703      adjustment if the signs are negative.
 704
 705      However, both cases should be rare.  */
 706   return range_fits_type_p (&range, TYPE_PRECISION (to_type),
 707                             TYPE_SIGN (to_type));
 708 }
 709
 710 static void
 711 split_constant_offset (tree type, tree *var, tree *off,
 712                        value_range *result_range,
 713                        hash_map<tree, std::pair<tree, tree> > &cache,
 714                        unsigned *limit);
 715
 716 /* Helper function for split_constant_offset.  If TYPE is a pointer type,
 717    try to express OP0 CODE OP1 as:
 718
 719      POINTER_PLUS <*VAR, (sizetype) *OFF>
 720
 721    where:
 722
 723    - *VAR has type TYPE
 724    - *OFF is a constant of type ssizetype.
 725
 726    If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
 727
 728      *VAR + (sizetype) *OFF
 729
 730    where:
 731
 732    - *VAR has type sizetype
 733    - *OFF is a constant of type ssizetype.
 734
 735    In both cases, OP0 CODE OP1 has type TYPE.
 736
 737    Return true on success.  A false return value indicates that we can't
 738    do better than set *OFF to zero.
 739
 740    When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
 741    if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
 742
 743    CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
 744    visited.  LIMIT counts down the number of SSA names that we are
 745    allowed to process before giving up.  */
 746
 747 static bool
 748 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 749                          tree *var, tree *off, value_range *result_range,
 750                          hash_map<tree, std::pair<tree, tree> > &cache,
 751                          unsigned *limit)
 752 {
 753   tree var0, var1;
 754   tree off0, off1;
 755   value_range op0_range, op1_range;
 756
 757   *var = NULL_TREE;
 758   *off = NULL_TREE;
 759
 760   if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
 761     return false;
 762
 763   switch (code)
 764     {
 765     case INTEGER_CST:
 766       *var = size_int (0);
 767       *off = fold_convert (ssizetype, op0);
 768       if (result_range)
 769         result_range->set (op0, op0);
 770       return true;
 771
 772     case POINTER_PLUS_EXPR:
 773       split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
 774       split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
 775       *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
 776       *off = size_binop (PLUS_EXPR, off0, off1);
 777       return true;
 778
 779     case PLUS_EXPR:
 780     case MINUS_EXPR:
 781       split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
 782       split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
 783       *off = size_binop (code, off0, off1);
 784       if (!compute_distributive_range (type, op0_range, code, op1_range,
 785                                        off, result_range))
 786         return false;
 787       *var = fold_build2 (code, sizetype, var0, var1);
 788       return true;
 789
 790     case MULT_EXPR:
 791       if (TREE_CODE (op1) != INTEGER_CST)
 792         return false;
 793
 794       split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
 795       op1_range.set (op1, op1);
 796       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
 797       if (!compute_distributive_range (type, op0_range, code, op1_range,
 798                                        off, result_range))
 799         return false;
 800       *var = fold_build2 (MULT_EXPR, sizetype, var0,
 801                           fold_convert (sizetype, op1));
 802       return true;
 803
 804     case ADDR_EXPR:
 805       {
 806         tree base, poffset;
 807         poly_int64 pbitsize, pbitpos, pbytepos;
 808         machine_mode pmode;
 809         int punsignedp, preversep, pvolatilep;
 810
 811         op0 = TREE_OPERAND (op0, 0);
 812         base
 813           = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
 814                                  &punsignedp, &preversep, &pvolatilep);
 815
 816         if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 817           return false;
 818         base = build_fold_addr_expr (base);
 819         off0 = ssize_int (pbytepos);
 820
 821         if (poffset)
 822           {
 823             split_constant_offset (poffset, &poffset, &off1, nullptr,
 824                                    cache, limit);
 825             off0 = size_binop (PLUS_EXPR, off0, off1);
 826             base = fold_build_pointer_plus (base, poffset);
 827           }
 828
 829         var0 = fold_convert (type, base);
 830
 831         /* If variable length types are involved, punt, otherwise casts
 832            might be converted into ARRAY_REFs in gimplify_conversion.
 833            To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
 834            possibly no longer appears in current GIMPLE, might resurface.
 835            This perhaps could run
 836            if (CONVERT_EXPR_P (var0))
 837              {
 838                gimplify_conversion (&var0);
 839                // Attempt to fill in any within var0 found ARRAY_REF's
 840                // element size from corresponding op embedded ARRAY_REF,
 841                // if unsuccessful, just punt.
 842              }  */
 843         while (POINTER_TYPE_P (type))
 844           type = TREE_TYPE (type);
 845         if (int_size_in_bytes (type) < 0)
 846           return false;
 847
 848         *var = var0;
 849         *off = off0;
 850         return true;
 851       }
 852
 853     case SSA_NAME:
 854       {
 855         if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
 856           return false;
 857
 858         gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
 859         enum tree_code subcode;
 860
 861         if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
 862           return false;
 863
 864         subcode = gimple_assign_rhs_code (def_stmt);
 865
 866         /* We are using a cache to avoid un-CSEing large amounts of code.  */
 867         bool use_cache = false;
 868         if (!has_single_use (op0)
 869             && (subcode == POINTER_PLUS_EXPR
 870                 || subcode == PLUS_EXPR
 871                 || subcode == MINUS_EXPR
 872                 || subcode == MULT_EXPR
 873                 || subcode == ADDR_EXPR
 874                 || CONVERT_EXPR_CODE_P (subcode)))
 875           {
 876             use_cache = true;
 877             bool existed;
 878             std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
 879             if (existed)
 880               {
 881                 if (integer_zerop (e.second))
 882                   return false;
 883                 *var = e.first;
 884                 *off = e.second;
 885                 /* The caller sets the range in this case.  */
 886                 return true;
 887               }
 888             e = std::make_pair (op0, ssize_int (0));
 889           }
 890
 891         if (*limit == 0)
 892           return false;
 893         --*limit;
 894
 895         var0 = gimple_assign_rhs1 (def_stmt);
 896         var1 = gimple_assign_rhs2 (def_stmt);
 897
 898         bool res = split_constant_offset_1 (type, var0, subcode, var1,
 899                                             var, off, nullptr, cache, limit);
 900         if (res && use_cache)
 901           *cache.get (op0) = std::make_pair (*var, *off);
 902         /* The caller sets the range in this case.  */
 903         return res;
 904       }
 905     CASE_CONVERT:
 906       {
 907         /* We can only handle the following conversions:
 908
 909            - Conversions from one pointer type to another pointer type.
 910
 911            - Conversions from one non-trapping integral type to another
 912              non-trapping integral type.  In this case, the recursive
 913              call makes sure that:
 914
 915                (sizetype) OP0
 916
 917              can be expressed as a sizetype operation involving VAR and OFF,
 918              and all we need to do is check whether:
 919
 920                (sizetype) OP0 == (sizetype) (TYPE) OP0
 921
 922            - Conversions from a non-trapping sizetype-size integral type to
 923              a like-sized pointer type.  In this case, the recursive call
 924              makes sure that:
 925
 926                (sizetype) OP0 == *VAR + (sizetype) *OFF
 927
 928              and we can convert that to:
 929
 930                POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
 931
 932            - Conversions from a sizetype-sized pointer type to a like-sized
 933              non-trapping integral type.  In this case, the recursive call
 934              makes sure that:
 935
 936                OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
 937
 938              where the POINTER_PLUS and *VAR have the same precision as
 939              TYPE (and the same precision as sizetype).  Then:
 940
 941                (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF.  */
 942         tree itype = TREE_TYPE (op0);
 943         if ((POINTER_TYPE_P (itype)
 944              || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
 945             && (POINTER_TYPE_P (type)
 946                 || (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
 947             && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
 948                 || (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
 949                     && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
 950           {
 951             if (POINTER_TYPE_P (type))
 952               {
 953                 split_constant_offset (op0, var, off, nullptr, cache, limit);
 954                 *var = fold_convert (type, *var);
 955               }
 956             else if (POINTER_TYPE_P (itype))
 957               {
 958                 split_constant_offset (op0, var, off, nullptr, cache, limit);
 959                 *var = fold_convert (sizetype, *var);
 960               }
 961             else
 962               {
 963                 split_constant_offset (op0, var, off, &op0_range,
 964                                        cache, limit);
 965                 if (!nop_conversion_for_offset_p (type, itype, op0_range))
 966                   return false;
 967                 if (result_range)
 968                   {
 969                     *result_range = op0_range;
 970                     range_cast (*result_range, type);
 971                   }
 972               }
 973             return true;
 974           }
 975         return false;
 976       }
 977
 978     default:
 979       return false;
 980     }
 981 }
 982
 983 /* If EXP has pointer type, try to express it as:
 984
 985      POINTER_PLUS <*VAR, (sizetype) *OFF>
 986
 987    where:
 988
 989    - *VAR has the same type as EXP
 990    - *OFF is a constant of type ssizetype.
 991
 992    If EXP has an integral type, try to express (sizetype) EXP as:
 993
 994      *VAR + (sizetype) *OFF
 995
 996    where:
 997
 998    - *VAR has type sizetype
 999    - *OFF is a constant of type ssizetype.
1000
1001    If EXP_RANGE is nonnull, set it to the range of EXP.
1002
1003    CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1004    visited.  LIMIT counts down the number of SSA names that we are
1005    allowed to process before giving up.  */
1006
1007 static void
1008 split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range,
1009                        hash_map<tree, std::pair<tree, tree> > &cache,
1010                        unsigned *limit)
1011 {
1012   tree type = TREE_TYPE (exp), op0, op1;
1013   enum tree_code code;
1014
1015   code = TREE_CODE (exp);
1016   if (exp_range)
1017     {
1018       *exp_range = type;
1019       if (code == SSA_NAME)
1020         {
1021           value_range vr;
1022           get_range_query (cfun)->range_of_expr (vr, exp);
1023           if (vr.undefined_p ())
1024             vr.set_varying (TREE_TYPE (exp));
1025           wide_int var_min = wi::to_wide (vr.min ());
1026           wide_int var_max = wi::to_wide (vr.max ());
1027           value_range_kind vr_kind = vr.kind ();
1028           wide_int var_nonzero = get_nonzero_bits (exp);
1029           vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1030                                                        &var_min, &var_max,
1031                                                        var_nonzero,
1032                                                        TYPE_SIGN (type));
1033           /* This check for VR_VARYING is here because the old code
1034              using get_range_info would return VR_RANGE for the entire
1035              domain, instead of VR_VARYING.  The new code normalizes
1036              full-domain ranges to VR_VARYING.  */
1037           if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
1038             *exp_range = value_range (type, var_min, var_max);
1039         }
1040     }
1041
1042   if (!tree_is_chrec (exp)
1043       && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1044     {
1045       extract_ops_from_tree (exp, &code, &op0, &op1);
1046       if (split_constant_offset_1 (type, op0, code, op1, var, off,
1047                                    exp_range, cache, limit))
1048         return;
1049     }
1050
1051   *var = exp;
1052   if (INTEGRAL_TYPE_P (type))
1053     *var = fold_convert (sizetype, *var);
1054   *off = ssize_int (0);
1055
1056   value_range r;
1057   if (exp_range && code != SSA_NAME
1058       && get_range_query (cfun)->range_of_expr (r, exp)
1059       && !r.undefined_p ())
1060     *exp_range = r;
1061 }
1062
1063 /* Expresses EXP as VAR + OFF, where OFF is a constant.  VAR has the same
1064    type as EXP while OFF has type ssizetype.  */
1065
1066 void
1067 split_constant_offset (tree exp, tree *var, tree *off)
1068 {
1069   unsigned limit = param_ssa_name_def_chain_limit;
1070   static hash_map<tree, std::pair<tree, tree> > *cache;
1071   if (!cache)
1072     cache = new hash_map<tree, std::pair<tree, tree> > (37);
1073   split_constant_offset (exp, var, off, nullptr, *cache, &limit);
1074   *var = fold_convert (TREE_TYPE (exp), *var);
1075   cache->empty ();
1076 }
1077
1078 /* Returns the address ADDR of an object in a canonical shape (without nop
1079    casts, and with type of pointer to the object).  */
1080
1081 static tree
1082 canonicalize_base_object_address (tree addr)
1083 {
1084   tree orig = addr;
1085
1086   STRIP_NOPS (addr);
1087
1088   /* The base address may be obtained by casting from integer, in that case
1089      keep the cast.  */
1090   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1091     return orig;
1092
1093   if (TREE_CODE (addr) != ADDR_EXPR)
1094     return addr;
1095
1096   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1097 }
1098
1099 /* Analyze the behavior of memory reference REF within STMT.
1100    There are two modes:
1101
1102    - BB analysis.  In this case we simply split the address into base,
1103      init and offset components, without reference to any containing loop.
1104      The resulting base and offset are general expressions and they can
1105      vary arbitrarily from one iteration of the containing loop to the next.
1106      The step is always zero.
1107
1108    - loop analysis.  In this case we analyze the reference both wrt LOOP
1109      and on the basis that the reference occurs (is "used") in LOOP;
1110      see the comment above analyze_scalar_evolution_in_loop for more
1111      information about this distinction.  The base, init, offset and
1112      step fields are all invariant in LOOP.
1113
1114    Perform BB analysis if LOOP is null, or if LOOP is the function's
1115    dummy outermost loop.  In other cases perform loop analysis.
1116
1117    Return true if the analysis succeeded and store the results in DRB if so.
1118    BB analysis can only fail for bitfield or reversed-storage accesses.  */
1119
1120 opt_result
1121 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1122                       class loop *loop, const gimple *stmt)
1123 {
1124   poly_int64 pbitsize, pbitpos;
1125   tree base, poffset;
1126   machine_mode pmode;
1127   int punsignedp, preversep, pvolatilep;
1128   affine_iv base_iv, offset_iv;
1129   tree init, dinit, step;
1130   bool in_loop = (loop && loop->num);
1131
1132   if (dump_file && (dump_flags & TDF_DETAILS))
1133     fprintf (dump_file, "analyze_innermost: ");
1134
1135   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1136                               &punsignedp, &preversep, &pvolatilep);
1137   gcc_assert (base != NULL_TREE);
1138
1139   poly_int64 pbytepos;
1140   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
1141     return opt_result::failure_at (stmt,
1142                                    "failed: bit offset alignment.\n");
1143
1144   if (preversep)
1145     return opt_result::failure_at (stmt,
1146                                    "failed: reverse storage order.\n");
1147
1148   /* Calculate the alignment and misalignment for the inner reference.  */
1149   unsigned int HOST_WIDE_INT bit_base_misalignment;
1150   unsigned int bit_base_alignment;
1151   get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1152
1153   /* There are no bitfield references remaining in BASE, so the values
1154      we got back must be whole bytes.  */
1155   gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1156               && bit_base_misalignment % BITS_PER_UNIT == 0);
1157   unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1158   poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1159
1160   if (TREE_CODE (base) == MEM_REF)
1161     {
1162       if (!integer_zerop (TREE_OPERAND (base, 1)))
1163         {
1164           /* Subtract MOFF from the base and add it to POFFSET instead.
1165              Adjust the misalignment to reflect the amount we subtracted.  */
1166           poly_offset_int moff = mem_ref_offset (base);
1167           base_misalignment -= moff.force_shwi ();
1168           tree mofft = wide_int_to_tree (sizetype, moff);
1169           if (!poffset)
1170             poffset = mofft;
1171           else
1172             poffset = size_binop (PLUS_EXPR, poffset, mofft);
1173         }
1174       base = TREE_OPERAND (base, 0);
1175     }
1176   else
1177     base = build_fold_addr_expr (base);
1178
1179   if (in_loop)
1180     {
1181       if (!simple_iv (loop, loop, base, &base_iv, true))
1182         return opt_result::failure_at
1183           (stmt, "failed: evolution of base is not affine.\n");
1184     }
1185   else
1186     {
1187       base_iv.base = base;
1188       base_iv.step = ssize_int (0);
1189       base_iv.no_overflow = true;
1190     }
1191
1192   if (!poffset)
1193     {
1194       offset_iv.base = ssize_int (0);
1195       offset_iv.step = ssize_int (0);
1196     }
1197   else
1198     {
1199       if (!in_loop)
1200         {
1201           offset_iv.base = poffset;
1202           offset_iv.step = ssize_int (0);
1203         }
1204       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1205         return opt_result::failure_at
1206           (stmt, "failed: evolution of offset is not affine.\n");
1207     }
1208
1209   init = ssize_int (pbytepos);
1210
1211   /* Subtract any constant component from the base and add it to INIT instead.
1212      Adjust the misalignment to reflect the amount we subtracted.  */
1213   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1214   init = size_binop (PLUS_EXPR, init, dinit);
1215   base_misalignment -= TREE_INT_CST_LOW (dinit);
1216
1217   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1218   init = size_binop (PLUS_EXPR, init, dinit);
1219
1220   step = size_binop (PLUS_EXPR,
1221                      fold_convert (ssizetype, base_iv.step),
1222                      fold_convert (ssizetype, offset_iv.step));
1223
1224   base = canonicalize_base_object_address (base_iv.base);
1225
1226   /* See if get_pointer_alignment can guarantee a higher alignment than
1227      the one we calculated above.  */
1228   unsigned int HOST_WIDE_INT alt_misalignment;
1229   unsigned int alt_alignment;
1230   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1231
1232   /* As above, these values must be whole bytes.  */
1233   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1234               && alt_misalignment % BITS_PER_UNIT == 0);
1235   alt_alignment /= BITS_PER_UNIT;
1236   alt_misalignment /= BITS_PER_UNIT;
1237
1238   if (base_alignment < alt_alignment)
1239     {
1240       base_alignment = alt_alignment;
1241       base_misalignment = alt_misalignment;
1242     }
1243
1244   drb->base_address = base;
1245   drb->offset = fold_convert (ssizetype, offset_iv.base);
1246   drb->init = init;
1247   drb->step = step;
1248   if (known_misalignment (base_misalignment, base_alignment,
1249                           &drb->base_misalignment))
1250     drb->base_alignment = base_alignment;
1251   else
1252     {
1253       drb->base_alignment = known_alignment (base_misalignment);
1254       drb->base_misalignment = 0;
1255     }
1256   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1257   drb->step_alignment = highest_pow2_factor (step);
1258
1259   if (dump_file && (dump_flags & TDF_DETAILS))
1260     fprintf (dump_file, "success.\n");
1261
1262   return opt_result::success ();
1263 }
1264
1265 /* Return true if OP is a valid component reference for a DR access
1266    function.  This accepts a subset of what handled_component_p accepts.  */
1267
1268 static bool
1269 access_fn_component_p (tree op)
1270 {
1271   switch (TREE_CODE (op))
1272     {
1273     case REALPART_EXPR:
1274     case IMAGPART_EXPR:
1275     case ARRAY_REF:
1276       return true;
1277
1278     case COMPONENT_REF:
1279       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1280
1281     default:
1282       return false;
1283     }
1284 }
1285
1286 /* Returns whether BASE can have a access_fn_component_p with BASE
1287    as base.  */
1288
1289 static bool
1290 base_supports_access_fn_components_p (tree base)
1291 {
1292   switch (TREE_CODE (TREE_TYPE (base)))
1293     {
1294     case COMPLEX_TYPE:
1295     case ARRAY_TYPE:
1296     case RECORD_TYPE:
1297       return true;
1298     default:
1299       return false;
1300     }
1301 }
1302
1303 /* Determines the base object and the list of indices of memory reference
1304    DR, analyzed in LOOP and instantiated before NEST.  */
1305
1306 static void
1307 dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
1308 {
1309   /* If analyzing a basic-block there are no indices to analyze
1310      and thus no access functions.  */
1311   if (!nest)
1312     {
1313       dri->base_object = ref;
1314       dri->access_fns.create (0);
1315       return;
1316     }
1317
1318   vec<tree> access_fns = vNULL;
1319
1320   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1321      into a two element array with a constant index.  The base is
1322      then just the immediate underlying object.  */
1323   if (TREE_CODE (ref) == REALPART_EXPR)
1324     {
1325       ref = TREE_OPERAND (ref, 0);
1326       access_fns.safe_push (integer_zero_node);
1327     }
1328   else if (TREE_CODE (ref) == IMAGPART_EXPR)
1329     {
1330       ref = TREE_OPERAND (ref, 0);
1331       access_fns.safe_push (integer_one_node);
1332     }
1333
1334   /* Analyze access functions of dimensions we know to be independent.
1335      The list of component references handled here should be kept in
1336      sync with access_fn_component_p.  */
1337   while (handled_component_p (ref))
1338     {
1339       if (TREE_CODE (ref) == ARRAY_REF)
1340         {
1341           tree op = TREE_OPERAND (ref, 1);
1342           tree access_fn = analyze_scalar_evolution (loop, op);
1343           access_fn = instantiate_scev (nest, loop, access_fn);
1344           access_fns.safe_push (access_fn);
1345         }
1346       else if (TREE_CODE (ref) == COMPONENT_REF
1347                && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1348         {
1349           /* For COMPONENT_REFs of records (but not unions!) use the
1350              FIELD_DECL offset as constant access function so we can
1351              disambiguate a[i].f1 and a[i].f2.  */
1352           tree off = component_ref_field_offset (ref);
1353           off = size_binop (PLUS_EXPR,
1354                             size_binop (MULT_EXPR,
1355                                         fold_convert (bitsizetype, off),
1356                                         bitsize_int (BITS_PER_UNIT)),
1357                             DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1358           access_fns.safe_push (off);
1359         }
1360       else
1361         /* If we have an unhandled component we could not translate
1362            to an access function stop analyzing.  We have determined
1363            our base object in this case.  */
1364         break;
1365
1366       ref = TREE_OPERAND (ref, 0);
1367     }
1368
1369   /* If the address operand of a MEM_REF base has an evolution in the
1370      analyzed nest, add it as an additional independent access-function.  */
1371   if (TREE_CODE (ref) == MEM_REF)
1372     {
1373       tree op = TREE_OPERAND (ref, 0);
1374       tree access_fn = analyze_scalar_evolution (loop, op);
1375       access_fn = instantiate_scev (nest, loop, access_fn);
1376       STRIP_NOPS (access_fn);
1377       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1378         {
1379           tree memoff = TREE_OPERAND (ref, 1);
1380           tree base = initial_condition (access_fn);
1381           tree orig_type = TREE_TYPE (base);
1382           STRIP_USELESS_TYPE_CONVERSION (base);
1383           tree off;
1384           split_constant_offset (base, &base, &off);
1385           STRIP_USELESS_TYPE_CONVERSION (base);
1386           /* Fold the MEM_REF offset into the evolutions initial
1387              value to make more bases comparable.  */
1388           if (!integer_zerop (memoff))
1389             {
1390               off = size_binop (PLUS_EXPR, off,
1391                                 fold_convert (ssizetype, memoff));
1392               memoff = build_int_cst (TREE_TYPE (memoff), 0);
1393             }
1394           /* Adjust the offset so it is a multiple of the access type
1395              size and thus we separate bases that can possibly be used
1396              to produce partial overlaps (which the access_fn machinery
1397              cannot handle).  */
1398           wide_int rem;
1399           if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1400               && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1401               && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1402             rem = wi::mod_trunc
1403               (wi::to_wide (off),
1404                wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1405                SIGNED);
1406           else
1407             /* If we can't compute the remainder simply force the initial
1408                condition to zero.  */
1409             rem = wi::to_wide (off);
1410           off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1411           memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1412           /* And finally replace the initial condition.  */
1413           access_fn = chrec_replace_initial_condition
1414               (access_fn, fold_convert (orig_type, off));
1415           /* ???  This is still not a suitable base object for
1416              dr_may_alias_p - the base object needs to be an
1417              access that covers the object as whole.  With
1418              an evolution in the pointer this cannot be
1419              guaranteed.
1420              As a band-aid, mark the access so we can special-case
1421              it in dr_may_alias_p.  */
1422           tree old = ref;
1423           ref = fold_build2_loc (EXPR_LOCATION (ref),
1424                                  MEM_REF, TREE_TYPE (ref),
1425                                  base, memoff);
1426           MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1427           MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1428           dri->unconstrained_base = true;
1429           access_fns.safe_push (access_fn);
1430         }
1431     }
1432   else if (DECL_P (ref))
1433     {
1434       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1435       ref = build2 (MEM_REF, TREE_TYPE (ref),
1436                     build_fold_addr_expr (ref),
1437                     build_int_cst (reference_alias_ptr_type (ref), 0));
1438     }
1439
1440   dri->base_object = ref;
1441   dri->access_fns = access_fns;
1442 }
1443
1444 /* Extracts the alias analysis information from the memory reference DR.  */
1445
1446 static void
1447 dr_analyze_alias (struct data_reference *dr)
1448 {
1449   tree ref = DR_REF (dr);
1450   tree base = get_base_address (ref), addr;
1451
1452   if (INDIRECT_REF_P (base)
1453       || TREE_CODE (base) == MEM_REF)
1454     {
1455       addr = TREE_OPERAND (base, 0);
1456       if (TREE_CODE (addr) == SSA_NAME)
1457         DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1458     }
1459 }
1460
1461 /* Frees data reference DR.  */
1462
1463 void
1464 free_data_ref (data_reference_p dr)
1465 {
1466   DR_ACCESS_FNS (dr).release ();
1467   if (dr->alt_indices.base_object)
1468     dr->alt_indices.access_fns.release ();
1469   free (dr);
1470 }
1471
1472 /* Analyze memory reference MEMREF, which is accessed in STMT.
1473    The reference is a read if IS_READ is true, otherwise it is a write.
1474    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1475    within STMT, i.e. that it might not occur even if STMT is executed
1476    and runs to completion.
1477
1478    Return the data_reference description of MEMREF.  NEST is the outermost
1479    loop in which the reference should be instantiated, LOOP is the loop
1480    in which the data reference should be analyzed.  */
1481
1482 struct data_reference *
1483 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1484                  bool is_read, bool is_conditional_in_stmt)
1485 {
1486   struct data_reference *dr;
1487
1488   if (dump_file && (dump_flags & TDF_DETAILS))
1489     {
1490       fprintf (dump_file, "Creating dr for ");
1491       print_generic_expr (dump_file, memref, TDF_SLIM);
1492       fprintf (dump_file, "\n");
1493     }
1494
1495   dr = XCNEW (struct data_reference);
1496   DR_STMT (dr) = stmt;
1497   DR_REF (dr) = memref;
1498   DR_IS_READ (dr) = is_read;
1499   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1500
1501   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1502                         nest != NULL ? loop : NULL, stmt);
1503   dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop);
1504   dr_analyze_alias (dr);
1505
1506   if (dump_file && (dump_flags & TDF_DETAILS))
1507     {
1508       unsigned i;
1509       fprintf (dump_file, "\tbase_address: ");
1510       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1511       fprintf (dump_file, "\n\toffset from base address: ");
1512       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1513       fprintf (dump_file, "\n\tconstant offset from base address: ");
1514       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1515       fprintf (dump_file, "\n\tstep: ");
1516       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1517       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1518       fprintf (dump_file, "\n\tbase misalignment: %d",
1519                DR_BASE_MISALIGNMENT (dr));
1520       fprintf (dump_file, "\n\toffset alignment: %d",
1521                DR_OFFSET_ALIGNMENT (dr));
1522       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1523       fprintf (dump_file, "\n\tbase_object: ");
1524       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1525       fprintf (dump_file, "\n");
1526       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1527         {
1528           fprintf (dump_file, "\tAccess function %d: ", i);
1529           print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1530         }
1531     }
1532
1533   return dr;
1534 }
1535
1536 /*  A helper function computes order between two tree expressions T1 and T2.
1537     This is used in comparator functions sorting objects based on the order
1538     of tree expressions.  The function returns -1, 0, or 1.  */
1539
1540 int
1541 data_ref_compare_tree (tree t1, tree t2)
1542 {
1543   int i, cmp;
1544   enum tree_code code;
1545   char tclass;
1546
1547   if (t1 == t2)
1548     return 0;
1549   if (t1 == NULL)
1550     return -1;
1551   if (t2 == NULL)
1552     return 1;
1553
1554   STRIP_USELESS_TYPE_CONVERSION (t1);
1555   STRIP_USELESS_TYPE_CONVERSION (t2);
1556   if (t1 == t2)
1557     return 0;
1558
1559   if (TREE_CODE (t1) != TREE_CODE (t2)
1560       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1561     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1562
1563   code = TREE_CODE (t1);
1564   switch (code)
1565     {
1566     case INTEGER_CST:
1567       return tree_int_cst_compare (t1, t2);
1568
1569     case STRING_CST:
1570       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1571         return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1572       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1573                      TREE_STRING_LENGTH (t1));
1574
1575     case SSA_NAME:
1576       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1577         return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1578       break;
1579
1580     default:
1581       if (POLY_INT_CST_P (t1))
1582         return compare_sizes_for_sort (wi::to_poly_widest (t1),
1583                                        wi::to_poly_widest (t2));
1584
1585       tclass = TREE_CODE_CLASS (code);
1586
1587       /* For decls, compare their UIDs.  */
1588       if (tclass == tcc_declaration)
1589         {
1590           if (DECL_UID (t1) != DECL_UID (t2))
1591             return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1592           break;
1593         }
1594       /* For expressions, compare their operands recursively.  */
1595       else if (IS_EXPR_CODE_CLASS (tclass))
1596         {
1597           for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1598             {
1599               cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1600                                            TREE_OPERAND (t2, i));
1601               if (cmp != 0)
1602                 return cmp;
1603             }
1604         }
1605       else
1606         gcc_unreachable ();
1607     }
1608
1609   return 0;
1610 }
1611
1612 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1613    check.  */
1614
1615 opt_result
1616 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1617 {
1618   if (dump_enabled_p ())
1619     dump_printf (MSG_NOTE,
1620                  "consider run-time aliasing test between %T and %T\n",
1621                  DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1622
1623   if (!speed_p)
1624     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1625                                    "runtime alias check not supported when"
1626                                    " optimizing for size.\n");
1627
1628   /* FORNOW: We don't support versioning with outer-loop in either
1629      vectorization or loop distribution.  */
1630   if (loop != NULL && loop->inner != NULL)
1631     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1632                                    "runtime alias check not supported for"
1633                                    " outer loop.\n");
1634
1635   return opt_result::success ();
1636 }
1637
1638 /* Operator == between two dr_with_seg_len objects.
1639
1640    This equality operator is used to make sure two data refs
1641    are the same one so that we will consider to combine the
1642    aliasing checks of those two pairs of data dependent data
1643    refs.  */
1644
1645 static bool
1646 operator == (const dr_with_seg_len& d1,
1647              const dr_with_seg_len& d2)
1648 {
1649   return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1650                            DR_BASE_ADDRESS (d2.dr), 0)
1651           && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1652           && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1653           && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1654           && known_eq (d1.access_size, d2.access_size)
1655           && d1.align == d2.align);
1656 }
1657
1658 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1659    so that we can combine aliasing checks in one scan.  */
1660
1661 static int
1662 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1663 {
1664   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1665   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1666   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1667   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1668
1669   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1670      if a and c have the same basic address snd step, and b and d have the same
1671      address and step.  Therefore, if any a&c or b&d don't have the same address
1672      and step, we don't care the order of those two pairs after sorting.  */
1673   int comp_res;
1674
1675   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1676                                          DR_BASE_ADDRESS (b1.dr))) != 0)
1677     return comp_res;
1678   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1679                                          DR_BASE_ADDRESS (b2.dr))) != 0)
1680     return comp_res;
1681   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1682                                          DR_STEP (b1.dr))) != 0)
1683     return comp_res;
1684   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1685                                          DR_STEP (b2.dr))) != 0)
1686     return comp_res;
1687   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1688                                          DR_OFFSET (b1.dr))) != 0)
1689     return comp_res;
1690   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1691                                          DR_INIT (b1.dr))) != 0)
1692     return comp_res;
1693   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1694                                          DR_OFFSET (b2.dr))) != 0)
1695     return comp_res;
1696   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1697                                          DR_INIT (b2.dr))) != 0)
1698     return comp_res;
1699
1700   return 0;
1701 }
1702
1703 /* Dump information about ALIAS_PAIR, indenting each line by INDENT.  */
1704
1705 static void
1706 dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1707 {
1708   dump_printf (MSG_NOTE, "%sreference:      %T vs. %T\n", indent,
1709                DR_REF (alias_pair->first.dr),
1710                DR_REF (alias_pair->second.dr));
1711
1712   dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1713                alias_pair->first.seg_len);
1714   if (!operand_equal_p (alias_pair->first.seg_len,
1715                         alias_pair->second.seg_len, 0))
1716     dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1717
1718   dump_printf (MSG_NOTE, "\n%saccess size:    ", indent);
1719   dump_dec (MSG_NOTE, alias_pair->first.access_size);
1720   if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1721     {
1722       dump_printf (MSG_NOTE, " vs. ");
1723       dump_dec (MSG_NOTE, alias_pair->second.access_size);
1724     }
1725
1726   dump_printf (MSG_NOTE, "\n%salignment:      %d", indent,
1727                alias_pair->first.align);
1728   if (alias_pair->first.align != alias_pair->second.align)
1729     dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1730
1731   dump_printf (MSG_NOTE, "\n%sflags:         ", indent);
1732   if (alias_pair->flags & DR_ALIAS_RAW)
1733     dump_printf (MSG_NOTE, " RAW");
1734   if (alias_pair->flags & DR_ALIAS_WAR)
1735     dump_printf (MSG_NOTE, " WAR");
1736   if (alias_pair->flags & DR_ALIAS_WAW)
1737     dump_printf (MSG_NOTE, " WAW");
1738   if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1739     dump_printf (MSG_NOTE, " ARBITRARY");
1740   if (alias_pair->flags & DR_ALIAS_SWAPPED)
1741     dump_printf (MSG_NOTE, " SWAPPED");
1742   if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1743     dump_printf (MSG_NOTE, " UNSWAPPED");
1744   if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1745     dump_printf (MSG_NOTE, " MIXED_STEPS");
1746   if (alias_pair->flags == 0)
1747     dump_printf (MSG_NOTE, " <none>");
1748   dump_printf (MSG_NOTE, "\n");
1749 }
1750
1751 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1752    FACTOR is number of iterations that each data reference is accessed.
1753
1754    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1755    we create an expression:
1756
1757    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1758    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1759
1760    for aliasing checks.  However, in some cases we can decrease the number
1761    of checks by combining two checks into one.  For example, suppose we have
1762    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1763    condition is satisfied:
1764
1765    load_ptr_0 < load_ptr_1  &&
1766    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1767
1768    (this condition means, in each iteration of vectorized loop, the accessed
1769    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1770    load_ptr_1.)
1771
1772    we then can use only the following expression to finish the alising checks
1773    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1774
1775    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1776    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1777
1778    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1779    basic address.  */
1780
1781 void
1782 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1783                                poly_uint64)
1784 {
1785   if (alias_pairs->is_empty ())
1786     return;
1787
1788   /* Canonicalize each pair so that the base components are ordered wrt
1789      data_ref_compare_tree.  This allows the loop below to merge more
1790      cases.  */
1791   unsigned int i;
1792   dr_with_seg_len_pair_t *alias_pair;
1793   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1794     {
1795       data_reference_p dr_a = alias_pair->first.dr;
1796       data_reference_p dr_b = alias_pair->second.dr;
1797       int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1798                                             DR_BASE_ADDRESS (dr_b));
1799       if (comp_res == 0)
1800         comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1801       if (comp_res == 0)
1802         comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1803       if (comp_res > 0)
1804         {
1805           std::swap (alias_pair->first, alias_pair->second);
1806           alias_pair->flags |= DR_ALIAS_SWAPPED;
1807         }
1808       else
1809         alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1810     }
1811
1812   /* Sort the collected data ref pairs so that we can scan them once to
1813      combine all possible aliasing checks.  */
1814   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1815
1816   /* Scan the sorted dr pairs and check if we can combine alias checks
1817      of two neighboring dr pairs.  */
1818   unsigned int last = 0;
1819   for (i = 1; i < alias_pairs->length (); ++i)
1820     {
1821       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1822       dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1823       dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1824
1825       dr_with_seg_len *dr_a1 = &alias_pair1->first;
1826       dr_with_seg_len *dr_b1 = &alias_pair1->second;
1827       dr_with_seg_len *dr_a2 = &alias_pair2->first;
1828       dr_with_seg_len *dr_b2 = &alias_pair2->second;
1829
1830       /* Remove duplicate data ref pairs.  */
1831       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1832         {
1833           if (dump_enabled_p ())
1834             dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1835                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1836                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1837           alias_pair1->flags |= alias_pair2->flags;
1838           continue;
1839         }
1840
1841       /* Assume that we won't be able to merge the pairs, then correct
1842          if we do.  */
1843       last += 1;
1844       if (last != i)
1845         (*alias_pairs)[last] = (*alias_pairs)[i];
1846
1847       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1848         {
1849           /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1850              and DR_A1 and DR_A2 are two consecutive memrefs.  */
1851           if (*dr_a1 == *dr_a2)
1852             {
1853               std::swap (dr_a1, dr_b1);
1854               std::swap (dr_a2, dr_b2);
1855             }
1856
1857           poly_int64 init_a1, init_a2;
1858           /* Only consider cases in which the distance between the initial
1859              DR_A1 and the initial DR_A2 is known at compile time.  */
1860           if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1861                                 DR_BASE_ADDRESS (dr_a2->dr), 0)
1862               || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1863                                    DR_OFFSET (dr_a2->dr), 0)
1864               || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1865               || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1866             continue;
1867
1868           /* Don't combine if we can't tell which one comes first.  */
1869           if (!ordered_p (init_a1, init_a2))
1870             continue;
1871
1872           /* Work out what the segment length would be if we did combine
1873              DR_A1 and DR_A2:
1874
1875              - If DR_A1 and DR_A2 have equal lengths, that length is
1876                also the combined length.
1877
1878              - If DR_A1 and DR_A2 both have negative "lengths", the combined
1879                length is the lower bound on those lengths.
1880
1881              - If DR_A1 and DR_A2 both have positive lengths, the combined
1882                length is the upper bound on those lengths.
1883
1884              Other cases are unlikely to give a useful combination.
1885
1886              The lengths both have sizetype, so the sign is taken from
1887              the step instead.  */
1888           poly_uint64 new_seg_len = 0;
1889           bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1890                                                  dr_a2->seg_len, 0);
1891           if (new_seg_len_p)
1892             {
1893               poly_uint64 seg_len_a1, seg_len_a2;
1894               if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1895                   || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1896                 continue;
1897
1898               tree indicator_a = dr_direction_indicator (dr_a1->dr);
1899               if (TREE_CODE (indicator_a) != INTEGER_CST)
1900                 continue;
1901
1902               tree indicator_b = dr_direction_indicator (dr_a2->dr);
1903               if (TREE_CODE (indicator_b) != INTEGER_CST)
1904                 continue;
1905
1906               int sign_a = tree_int_cst_sgn (indicator_a);
1907               int sign_b = tree_int_cst_sgn (indicator_b);
1908
1909               if (sign_a <= 0 && sign_b <= 0)
1910                 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1911               else if (sign_a >= 0 && sign_b >= 0)
1912                 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1913               else
1914                 continue;
1915             }
1916           /* At this point we're committed to merging the refs.  */
1917
1918           /* Make sure dr_a1 starts left of dr_a2.  */
1919           if (maybe_gt (init_a1, init_a2))
1920             {
1921               std::swap (*dr_a1, *dr_a2);
1922               std::swap (init_a1, init_a2);
1923             }
1924
1925           /* The DR_Bs are equal, so only the DR_As can introduce
1926              mixed steps.  */
1927           if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1928             alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1929
1930           if (new_seg_len_p)
1931             {
1932               dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1933                                               new_seg_len);
1934               dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1935             }
1936
1937           /* This is always positive due to the swap above.  */
1938           poly_uint64 diff = init_a2 - init_a1;
1939
1940           /* The new check will start at DR_A1.  Make sure that its access
1941              size encompasses the initial DR_A2.  */
1942           if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1943             {
1944               dr_a1->access_size = upper_bound (dr_a1->access_size,
1945                                                 diff + dr_a2->access_size);
1946               unsigned int new_align = known_alignment (dr_a1->access_size);
1947               dr_a1->align = MIN (dr_a1->align, new_align);
1948             }
1949           if (dump_enabled_p ())
1950             dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1951                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1952                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1953           alias_pair1->flags |= alias_pair2->flags;
1954           last -= 1;
1955         }
1956     }
1957   alias_pairs->truncate (last + 1);
1958
1959   /* Try to restore the original dr_with_seg_len order within each
1960      dr_with_seg_len_pair_t.  If we ended up combining swapped and
1961      unswapped pairs into the same check, we have to invalidate any
1962      RAW, WAR and WAW information for it.  */
1963   if (dump_enabled_p ())
1964     dump_printf (MSG_NOTE, "merged alias checks:\n");
1965   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1966     {
1967       unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1968       unsigned int swapped = (alias_pair->flags & swap_mask);
1969       if (swapped == DR_ALIAS_SWAPPED)
1970         std::swap (alias_pair->first, alias_pair->second);
1971       else if (swapped != DR_ALIAS_UNSWAPPED)
1972         alias_pair->flags |= DR_ALIAS_ARBITRARY;
1973       alias_pair->flags &= ~swap_mask;
1974       if (dump_enabled_p ())
1975         dump_alias_pair (alias_pair, "  ");
1976     }
1977 }
1978
1979 /* A subroutine of create_intersect_range_checks, with a subset of the
1980    same arguments.  Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
1981    to optimize cases in which the references form a simple RAW, WAR or
1982    WAR dependence.  */
1983
1984 static bool
1985 create_ifn_alias_checks (tree *cond_expr,
1986                          const dr_with_seg_len_pair_t &alias_pair)
1987 {
1988   const dr_with_seg_len& dr_a = alias_pair.first;
1989   const dr_with_seg_len& dr_b = alias_pair.second;
1990
1991   /* Check for cases in which:
1992
1993      (a) we have a known RAW, WAR or WAR dependence
1994      (b) the accesses are well-ordered in both the original and new code
1995          (see the comment above the DR_ALIAS_* flags for details); and
1996      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
1997   if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
1998     return false;
1999
2000   /* Make sure that both DRs access the same pattern of bytes,
2001      with a constant length and step.  */
2002   poly_uint64 seg_len;
2003   if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
2004       || !poly_int_tree_p (dr_a.seg_len, &seg_len)
2005       || maybe_ne (dr_a.access_size, dr_b.access_size)
2006       || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
2007       || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
2008     return false;
2009
2010   unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
2011   tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
2012   tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
2013
2014   /* See whether the target suports what we want to do.  WAW checks are
2015      equivalent to WAR checks here.  */
2016   internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
2017                      ? IFN_CHECK_RAW_PTRS
2018                      : IFN_CHECK_WAR_PTRS);
2019   unsigned int align = MIN (dr_a.align, dr_b.align);
2020   poly_uint64 full_length = seg_len + bytes;
2021   if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2022                                            full_length, align))
2023     {
2024       full_length = seg_len + dr_a.access_size;
2025       if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2026                                                full_length, align))
2027         return false;
2028     }
2029
2030   /* Commit to using this form of test.  */
2031   addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
2032   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2033
2034   addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
2035   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2036
2037   *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
2038                                              ifn, boolean_type_node,
2039                                              4, addr_a, addr_b,
2040                                              size_int (full_length),
2041                                              size_int (align));
2042
2043   if (dump_enabled_p ())
2044     {
2045       if (ifn == IFN_CHECK_RAW_PTRS)
2046         dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
2047       else
2048         dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
2049     }
2050   return true;
2051 }
2052
2053 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
2054    free of aliases, using a condition based on index values instead
2055    of a condition based on addresses.  Return true on success,
2056    storing the condition in *COND_EXPR.
2057
2058    This can only be done if the two data references in ALIAS_PAIR access
2059    the same array object and the index is the only difference.  For example,
2060    if the two data references are DR_A and DR_B:
2061
2062                        DR_A                           DR_B
2063       data-ref         arr[i]                         arr[j]
2064       base_object      arr                            arr
2065       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
2066
2067    The addresses and their index are like:
2068
2069         |<- ADDR_A    ->|          |<- ADDR_B    ->|
2070      ------------------------------------------------------->
2071         |   |   |   |   |          |   |   |   |   |
2072      ------------------------------------------------------->
2073         i_0 ...         i_0+4      j_0 ...         j_0+4
2074
2075    We can create expression based on index rather than address:
2076
2077      (unsigned) (i_0 - j_0 + 3) <= 6
2078
2079    i.e. the indices are less than 4 apart.
2080
2081    Note evolution step of index needs to be considered in comparison.  */
2082
2083 static bool
2084 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
2085                                      const dr_with_seg_len_pair_t &alias_pair)
2086 {
2087   const dr_with_seg_len &dr_a = alias_pair.first;
2088   const dr_with_seg_len &dr_b = alias_pair.second;
2089   if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
2090       || integer_zerop (DR_STEP (dr_a.dr))
2091       || integer_zerop (DR_STEP (dr_b.dr))
2092       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2093     return false;
2094
2095   poly_uint64 seg_len1, seg_len2;
2096   if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
2097       || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
2098     return false;
2099
2100   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2101     return false;
2102
2103   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2104     return false;
2105
2106   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2107     return false;
2108
2109   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2110
2111   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2112   unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
2113   if (neg_step)
2114     {
2115       abs_step = -abs_step;
2116       seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
2117       seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
2118     }
2119
2120   /* Infer the number of iterations with which the memory segment is accessed
2121      by DR.  In other words, alias is checked if memory segment accessed by
2122      DR_A in some iterations intersect with memory segment accessed by DR_B
2123      in the same amount iterations.
2124      Note segnment length is a linear function of number of iterations with
2125      DR_STEP as the coefficient.  */
2126   poly_uint64 niter_len1, niter_len2;
2127   if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
2128       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
2129     return false;
2130
2131   /* Divide each access size by the byte step, rounding up.  */
2132   poly_uint64 niter_access1, niter_access2;
2133   if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
2134                         abs_step, &niter_access1)
2135       || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
2136                            abs_step, &niter_access2))
2137     return false;
2138
2139   bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
2140
2141   int found = -1;
2142   for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2143     {
2144       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2145       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2146       /* Two indices must be the same if they are not scev, or not scev wrto
2147          current loop being vecorized.  */
2148       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2149           || TREE_CODE (access2) != POLYNOMIAL_CHREC
2150           || CHREC_VARIABLE (access1) != (unsigned)loop->num
2151           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2152         {
2153           if (operand_equal_p (access1, access2, 0))
2154             continue;
2155
2156           return false;
2157         }
2158       if (found >= 0)
2159         return false;
2160       found = i;
2161     }
2162
2163   /* Ought not to happen in practice, since if all accesses are equal then the
2164      alias should be decidable at compile time.  */
2165   if (found < 0)
2166     return false;
2167
2168   /* The two indices must have the same step.  */
2169   tree access1 = DR_ACCESS_FN (dr_a.dr, found);
2170   tree access2 = DR_ACCESS_FN (dr_b.dr, found);
2171   if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2172     return false;
2173
2174   tree idx_step = CHREC_RIGHT (access1);
2175   /* Index must have const step, otherwise DR_STEP won't be constant.  */
2176   gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2177   /* Index must evaluate in the same direction as DR.  */
2178   gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2179
2180   tree min1 = CHREC_LEFT (access1);
2181   tree min2 = CHREC_LEFT (access2);
2182   if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2183     return false;
2184
2185   /* Ideally, alias can be checked against loop's control IV, but we
2186      need to prove linear mapping between control IV and reference
2187      index.  Although that should be true, we check against (array)
2188      index of data reference.  Like segment length, index length is
2189      linear function of the number of iterations with index_step as
2190      the coefficient, i.e, niter_len * idx_step.  */
2191   offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
2192                                               SIGNED);
2193   if (neg_step)
2194     abs_idx_step = -abs_idx_step;
2195   poly_offset_int idx_len1 = abs_idx_step * niter_len1;
2196   poly_offset_int idx_len2 = abs_idx_step * niter_len2;
2197   poly_offset_int idx_access1 = abs_idx_step * niter_access1;
2198   poly_offset_int idx_access2 = abs_idx_step * niter_access2;
2199
2200   gcc_assert (known_ge (idx_len1, 0)
2201               && known_ge (idx_len2, 0)
2202               && known_ge (idx_access1, 0)
2203               && known_ge (idx_access2, 0));
2204
2205   /* Each access has the following pattern, with lengths measured
2206      in units of INDEX:
2207
2208           <-- idx_len -->
2209           <--- A: -ve step --->
2210           +-----+-------+-----+-------+-----+
2211           | n-1 | ..... |  0  | ..... | n-1 |
2212           +-----+-------+-----+-------+-----+
2213                         <--- B: +ve step --->
2214                         <-- idx_len -->
2215                         |
2216                        min
2217
2218      where "n" is the number of scalar iterations covered by the segment
2219      and where each access spans idx_access units.
2220
2221      A is the range of bytes accessed when the step is negative,
2222      B is the range when the step is positive.
2223
2224      When checking for general overlap, we need to test whether
2225      the range:
2226
2227        [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2228
2229      overlaps:
2230
2231        [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2232
2233      where:
2234
2235         low_offsetN = +ve step ? 0 : -idx_lenN;
2236        high_offsetN = +ve step ? idx_lenN : 0;
2237
2238      This is equivalent to testing whether:
2239
2240        min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2241        && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2242
2243      Converting this into a single test, there is an overlap if:
2244
2245        0 <= min2 - min1 + bias <= limit
2246
2247      where  bias = high_offset2 + idx_access2 - 1 - low_offset1
2248            limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2249                  + (high_offset2 - low_offset2 + idx_access2 - 1)
2250       i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2251
2252      Combining the tests requires limit to be computable in an unsigned
2253      form of the index type; if it isn't, we fall back to the usual
2254      pointer-based checks.
2255
2256      We can do better if DR_B is a write and if DR_A and DR_B are
2257      well-ordered in both the original and the new code (see the
2258      comment above the DR_ALIAS_* flags for details).  In this case
2259      we know that for each i in [0, n-1], the write performed by
2260      access i of DR_B occurs after access numbers j<=i of DR_A in
2261      both the original and the new code.  Any write or anti
2262      dependencies wrt those DR_A accesses are therefore maintained.
2263
2264      We just need to make sure that each individual write in DR_B does not
2265      overlap any higher-indexed access in DR_A; such DR_A accesses happen
2266      after the DR_B access in the original code but happen before it in
2267      the new code.
2268
2269      We know the steps for both accesses are equal, so by induction, we
2270      just need to test whether the first write of DR_B overlaps a later
2271      access of DR_A.  In other words, we need to move min1 along by
2272      one iteration:
2273
2274        min1' = min1 + idx_step
2275
2276      and use the ranges:
2277
2278        [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2279
2280      and:
2281
2282        [min2, min2 + idx_access2 - 1]
2283
2284      where:
2285
2286         low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2287        high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0.  */
2288   if (waw_or_war_p)
2289     idx_len1 -= abs_idx_step;
2290
2291   poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2292   if (!waw_or_war_p)
2293     limit += idx_len2;
2294
2295   tree utype = unsigned_type_for (TREE_TYPE (min1));
2296   if (!wi::fits_to_tree_p (limit, utype))
2297     return false;
2298
2299   poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2300   poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2301   poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2302   /* Equivalent to adding IDX_STEP to MIN1.  */
2303   if (waw_or_war_p)
2304     bias -= wi::to_offset (idx_step);
2305
2306   tree subject = fold_build2 (MINUS_EXPR, utype,
2307                               fold_convert (utype, min2),
2308                               fold_convert (utype, min1));
2309   subject = fold_build2 (PLUS_EXPR, utype, subject,
2310                          wide_int_to_tree (utype, bias));
2311   tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2312                                      wide_int_to_tree (utype, limit));
2313   if (*cond_expr)
2314     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2315                               *cond_expr, part_cond_expr);
2316   else
2317     *cond_expr = part_cond_expr;
2318   if (dump_enabled_p ())
2319     {
2320       if (waw_or_war_p)
2321         dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2322       else
2323         dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2324     }
2325   return true;
2326 }
2327
2328 /* A subroutine of create_intersect_range_checks, with a subset of the
2329    same arguments.  Try to optimize cases in which the second access
2330    is a write and in which some overlap is valid.  */
2331
2332 static bool
2333 create_waw_or_war_checks (tree *cond_expr,
2334                           const dr_with_seg_len_pair_t &alias_pair)
2335 {
2336   const dr_with_seg_len& dr_a = alias_pair.first;
2337   const dr_with_seg_len& dr_b = alias_pair.second;
2338
2339   /* Check for cases in which:
2340
2341      (a) DR_B is always a write;
2342      (b) the accesses are well-ordered in both the original and new code
2343          (see the comment above the DR_ALIAS_* flags for details); and
2344      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
2345   if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2346     return false;
2347
2348   /* Check for equal (but possibly variable) steps.  */
2349   tree step = DR_STEP (dr_a.dr);
2350   if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2351     return false;
2352
2353   /* Make sure that we can operate on sizetype without loss of precision.  */
2354   tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2355   if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2356     return false;
2357
2358   /* All addresses involved are known to have a common alignment ALIGN.
2359      We can therefore subtract ALIGN from an exclusive endpoint to get
2360      an inclusive endpoint.  In the best (and common) case, ALIGN is the
2361      same as the access sizes of both DRs, and so subtracting ALIGN
2362      cancels out the addition of an access size.  */
2363   unsigned int align = MIN (dr_a.align, dr_b.align);
2364   poly_uint64 last_chunk_a = dr_a.access_size - align;
2365   poly_uint64 last_chunk_b = dr_b.access_size - align;
2366
2367   /* Get a boolean expression that is true when the step is negative.  */
2368   tree indicator = dr_direction_indicator (dr_a.dr);
2369   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2370                                fold_convert (ssizetype, indicator),
2371                                ssize_int (0));
2372
2373   /* Get lengths in sizetype.  */
2374   tree seg_len_a
2375     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2376   step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2377
2378   /* Each access has the following pattern:
2379
2380           <- |seg_len| ->
2381           <--- A: -ve step --->
2382           +-----+-------+-----+-------+-----+
2383           | n-1 | ..... |  0  | ..... | n-1 |
2384           +-----+-------+-----+-------+-----+
2385                         <--- B: +ve step --->
2386                         <- |seg_len| ->
2387                         |
2388                    base address
2389
2390      where "n" is the number of scalar iterations covered by the segment.
2391
2392      A is the range of bytes accessed when the step is negative,
2393      B is the range when the step is positive.
2394
2395      We know that DR_B is a write.  We also know (from checking that
2396      DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2397      the write performed by access i of DR_B occurs after access numbers
2398      j<=i of DR_A in both the original and the new code.  Any write or
2399      anti dependencies wrt those DR_A accesses are therefore maintained.
2400
2401      We just need to make sure that each individual write in DR_B does not
2402      overlap any higher-indexed access in DR_A; such DR_A accesses happen
2403      after the DR_B access in the original code but happen before it in
2404      the new code.
2405
2406      We know the steps for both accesses are equal, so by induction, we
2407      just need to test whether the first write of DR_B overlaps a later
2408      access of DR_A.  In other words, we need to move addr_a along by
2409      one iteration:
2410
2411        addr_a' = addr_a + step
2412
2413      and check whether:
2414
2415        [addr_b, addr_b + last_chunk_b]
2416
2417      overlaps:
2418
2419        [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2420
2421      where [low_offset_a, high_offset_a] spans accesses [1, n-1].  I.e.:
2422
2423         low_offset_a = +ve step ? 0 : seg_len_a - step
2424        high_offset_a = +ve step ? seg_len_a - step : 0
2425
2426      This is equivalent to testing whether:
2427
2428        addr_a' + low_offset_a <= addr_b + last_chunk_b
2429        && addr_b <= addr_a' + high_offset_a + last_chunk_a
2430
2431      Converting this into a single test, there is an overlap if:
2432
2433        0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2434
2435      where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2436
2437      If DR_A is performed, limit + |step| - last_chunk_b is known to be
2438      less than the size of the object underlying DR_A.  We also know
2439      that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2440      guaranteed at compile time.  There can therefore be no overflow if
2441      "limit" is calculated in an unsigned type with pointer precision.  */
2442   tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2443                                          DR_OFFSET (dr_a.dr));
2444   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2445
2446   tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2447                                          DR_OFFSET (dr_b.dr));
2448   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2449
2450   /* Advance ADDR_A by one iteration and adjust the length to compensate.  */
2451   addr_a = fold_build_pointer_plus (addr_a, step);
2452   tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2453                                            seg_len_a, step);
2454   if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2455     seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2456
2457   tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2458                                    seg_len_a_minus_step, size_zero_node);
2459   if (!CONSTANT_CLASS_P (low_offset_a))
2460     low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2461
2462   /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2463      but it's usually more efficient to reuse the LOW_OFFSET_A result.  */
2464   tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2465                                     low_offset_a);
2466
2467   /* The amount added to addr_b - addr_a'.  */
2468   tree bias = fold_build2 (MINUS_EXPR, sizetype,
2469                            size_int (last_chunk_b), low_offset_a);
2470
2471   tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2472   limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2473                        size_int (last_chunk_a + last_chunk_b));
2474
2475   tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
2476   subject = fold_build2 (PLUS_EXPR, sizetype,
2477                          fold_convert (sizetype, subject), bias);
2478
2479   *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2480   if (dump_enabled_p ())
2481     dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2482   return true;
2483 }
2484
2485 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2486    every address ADDR accessed by D:
2487
2488      *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2489
2490    In this case, every element accessed by D is aligned to at least
2491    ALIGN bytes.
2492
2493    If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2494
2495      *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT.  */
2496
2497 static void
2498 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2499                      tree *seg_max_out, HOST_WIDE_INT align)
2500 {
2501   /* Each access has the following pattern:
2502
2503           <- |seg_len| ->
2504           <--- A: -ve step --->
2505           +-----+-------+-----+-------+-----+
2506           | n-1 | ,.... |  0  | ..... | n-1 |
2507           +-----+-------+-----+-------+-----+
2508                         <--- B: +ve step --->
2509                         <- |seg_len| ->
2510                         |
2511                    base address
2512
2513      where "n" is the number of scalar iterations covered by the segment.
2514      (This should be VF for a particular pair if we know that both steps
2515      are the same, otherwise it will be the full number of scalar loop
2516      iterations.)
2517
2518      A is the range of bytes accessed when the step is negative,
2519      B is the range when the step is positive.
2520
2521      If the access size is "access_size" bytes, the lowest addressed byte is:
2522
2523          base + (step < 0 ? seg_len : 0)   [LB]
2524
2525      and the highest addressed byte is always below:
2526
2527          base + (step < 0 ? 0 : seg_len) + access_size   [UB]
2528
2529      Thus:
2530
2531          LB <= ADDR < UB
2532
2533      If ALIGN is nonzero, all three values are aligned to at least ALIGN
2534      bytes, so:
2535
2536          LB <= ADDR <= UB - ALIGN
2537
2538      where "- ALIGN" folds naturally with the "+ access_size" and often
2539      cancels it out.
2540
2541      We don't try to simplify LB and UB beyond this (e.g. by using
2542      MIN and MAX based on whether seg_len rather than the stride is
2543      negative) because it is possible for the absolute size of the
2544      segment to overflow the range of a ssize_t.
2545
2546      Keeping the pointer_plus outside of the cond_expr should allow
2547      the cond_exprs to be shared with other alias checks.  */
2548   tree indicator = dr_direction_indicator (d.dr);
2549   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2550                                fold_convert (ssizetype, indicator),
2551                                ssize_int (0));
2552   tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2553                                             DR_OFFSET (d.dr));
2554   addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2555   tree seg_len
2556     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2557
2558   tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2559                                 seg_len, size_zero_node);
2560   tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2561                                 size_zero_node, seg_len);
2562   max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2563                            size_int (d.access_size - align));
2564
2565   *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2566   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2567 }
2568
2569 /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2570    storing the condition in *COND_EXPR.  The fallback is to generate a
2571    a test that the two accesses do not overlap:
2572
2573      end_a <= start_b || end_b <= start_a.  */
2574
2575 static void
2576 create_intersect_range_checks (class loop *loop, tree *cond_expr,
2577                                const dr_with_seg_len_pair_t &alias_pair)
2578 {
2579   const dr_with_seg_len& dr_a = alias_pair.first;
2580   const dr_with_seg_len& dr_b = alias_pair.second;
2581   *cond_expr = NULL_TREE;
2582   if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2583     return;
2584
2585   if (create_ifn_alias_checks (cond_expr, alias_pair))
2586     return;
2587
2588   if (create_waw_or_war_checks (cond_expr, alias_pair))
2589     return;
2590
2591   unsigned HOST_WIDE_INT min_align;
2592   tree_code cmp_code;
2593   /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2594      are equivalent.  This is just an optimization heuristic.  */
2595   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2596       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2597     {
2598       /* In this case adding access_size to seg_len is likely to give
2599          a simple X * step, where X is either the number of scalar
2600          iterations or the vectorization factor.  We're better off
2601          keeping that, rather than subtracting an alignment from it.
2602
2603          In this case the maximum values are exclusive and so there is
2604          no alias if the maximum of one segment equals the minimum
2605          of another.  */
2606       min_align = 0;
2607       cmp_code = LE_EXPR;
2608     }
2609   else
2610     {
2611       /* Calculate the minimum alignment shared by all four pointers,
2612          then arrange for this alignment to be subtracted from the
2613          exclusive maximum values to get inclusive maximum values.
2614          This "- min_align" is cumulative with a "+ access_size"
2615          in the calculation of the maximum values.  In the best
2616          (and common) case, the two cancel each other out, leaving
2617          us with an inclusive bound based only on seg_len.  In the
2618          worst case we're simply adding a smaller number than before.
2619
2620          Because the maximum values are inclusive, there is an alias
2621          if the maximum value of one segment is equal to the minimum
2622          value of the other.  */
2623       min_align = MIN (dr_a.align, dr_b.align);
2624       cmp_code = LT_EXPR;
2625     }
2626
2627   tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2628   get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2629   get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2630
2631   *cond_expr
2632     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2633         fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2634         fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2635   if (dump_enabled_p ())
2636     dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2637 }
2638
2639 /* Create a conditional expression that represents the run-time checks for
2640    overlapping of address ranges represented by a list of data references
2641    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
2642    COND_EXPR is the conditional expression to be used in the if statement
2643    that controls which version of the loop gets executed at runtime.  */
2644
2645 void
2646 create_runtime_alias_checks (class loop *loop,
2647                              const vec<dr_with_seg_len_pair_t> *alias_pairs,
2648                              tree * cond_expr)
2649 {
2650   tree part_cond_expr;
2651
2652   fold_defer_overflow_warnings ();
2653   for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
2654     {
2655       gcc_assert (alias_pair.flags);
2656       if (dump_enabled_p ())
2657         dump_printf (MSG_NOTE,
2658                      "create runtime check for data references %T and %T\n",
2659                      DR_REF (alias_pair.first.dr),
2660                      DR_REF (alias_pair.second.dr));
2661
2662       /* Create condition expression for each pair data references.  */
2663       create_intersect_range_checks (loop, &part_cond_expr, alias_pair);
2664       if (*cond_expr)
2665         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2666                                   *cond_expr, part_cond_expr);
2667       else
2668         *cond_expr = part_cond_expr;
2669     }
2670   fold_undefer_and_ignore_overflow_warnings ();
2671 }
2672
2673 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2674    expressions.  */
2675 static bool
2676 dr_equal_offsets_p1 (tree offset1, tree offset2)
2677 {
2678   bool res;
2679
2680   STRIP_NOPS (offset1);
2681   STRIP_NOPS (offset2);
2682
2683   if (offset1 == offset2)
2684     return true;
2685
2686   if (TREE_CODE (offset1) != TREE_CODE (offset2)
2687       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2688     return false;
2689
2690   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2691                              TREE_OPERAND (offset2, 0));
2692
2693   if (!res || !BINARY_CLASS_P (offset1))
2694     return res;
2695
2696   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2697                              TREE_OPERAND (offset2, 1));
2698
2699   return res;
2700 }
2701
2702 /* Check if DRA and DRB have equal offsets.  */
2703 bool
2704 dr_equal_offsets_p (struct data_reference *dra,
2705                     struct data_reference *drb)
2706 {
2707   tree offset1, offset2;
2708
2709   offset1 = DR_OFFSET (dra);
2710   offset2 = DR_OFFSET (drb);
2711
2712   return dr_equal_offsets_p1 (offset1, offset2);
2713 }
2714
2715 /* Returns true if FNA == FNB.  */
2716
2717 static bool
2718 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2719 {
2720   unsigned i, n = fna.length ();
2721
2722   if (n != fnb.length ())
2723     return false;
2724
2725   for (i = 0; i < n; i++)
2726     if (!operand_equal_p (fna[i], fnb[i], 0))
2727       return false;
2728
2729   return true;
2730 }
2731
2732 /* If all the functions in CF are the same, returns one of them,
2733    otherwise returns NULL.  */
2734
2735 static affine_fn
2736 common_affine_function (conflict_function *cf)
2737 {
2738   unsigned i;
2739   affine_fn comm;
2740
2741   if (!CF_NONTRIVIAL_P (cf))
2742     return affine_fn ();
2743
2744   comm = cf->fns[0];
2745
2746   for (i = 1; i < cf->n; i++)
2747     if (!affine_function_equal_p (comm, cf->fns[i]))
2748       return affine_fn ();
2749
2750   return comm;
2751 }
2752
2753 /* Returns the base of the affine function FN.  */
2754
2755 static tree
2756 affine_function_base (affine_fn fn)
2757 {
2758   return fn[0];
2759 }
2760
2761 /* Returns true if FN is a constant.  */
2762
2763 static bool
2764 affine_function_constant_p (affine_fn fn)
2765 {
2766   unsigned i;
2767   tree coef;
2768
2769   for (i = 1; fn.iterate (i, &coef); i++)
2770     if (!integer_zerop (coef))
2771       return false;
2772
2773   return true;
2774 }
2775
2776 /* Returns true if FN is the zero constant function.  */
2777
2778 static bool
2779 affine_function_zero_p (affine_fn fn)
2780 {
2781   return (integer_zerop (affine_function_base (fn))
2782           && affine_function_constant_p (fn));
2783 }
2784
2785 /* Returns a signed integer type with the largest precision from TA
2786    and TB.  */
2787
2788 static tree
2789 signed_type_for_types (tree ta, tree tb)
2790 {
2791   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2792     return signed_type_for (ta);
2793   else
2794     return signed_type_for (tb);
2795 }
2796
2797 /* Applies operation OP on affine functions FNA and FNB, and returns the
2798    result.  */
2799
2800 static affine_fn
2801 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2802 {
2803   unsigned i, n, m;
2804   affine_fn ret;
2805   tree coef;
2806
2807   if (fnb.length () > fna.length ())
2808     {
2809       n = fna.length ();
2810       m = fnb.length ();
2811     }
2812   else
2813     {
2814       n = fnb.length ();
2815       m = fna.length ();
2816     }
2817
2818   ret.create (m);
2819   for (i = 0; i < n; i++)
2820     {
2821       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2822                                          TREE_TYPE (fnb[i]));
2823       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2824     }
2825
2826   for (; fna.iterate (i, &coef); i++)
2827     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2828                                  coef, integer_zero_node));
2829   for (; fnb.iterate (i, &coef); i++)
2830     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2831                                  integer_zero_node, coef));
2832
2833   return ret;
2834 }
2835
2836 /* Returns the sum of affine functions FNA and FNB.  */
2837
2838 static affine_fn
2839 affine_fn_plus (affine_fn fna, affine_fn fnb)
2840 {
2841   return affine_fn_op (PLUS_EXPR, fna, fnb);
2842 }
2843
2844 /* Returns the difference of affine functions FNA and FNB.  */
2845
2846 static affine_fn
2847 affine_fn_minus (affine_fn fna, affine_fn fnb)
2848 {
2849   return affine_fn_op (MINUS_EXPR, fna, fnb);
2850 }
2851
2852 /* Frees affine function FN.  */
2853
2854 static void
2855 affine_fn_free (affine_fn fn)
2856 {
2857   fn.release ();
2858 }
2859
2860 /* Determine for each subscript in the data dependence relation DDR
2861    the distance.  */
2862
2863 static void
2864 compute_subscript_distance (struct data_dependence_relation *ddr)
2865 {
2866   conflict_function *cf_a, *cf_b;
2867   affine_fn fn_a, fn_b, diff;
2868
2869   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2870     {
2871       unsigned int i;
2872
2873       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2874         {
2875           struct subscript *subscript;
2876
2877           subscript = DDR_SUBSCRIPT (ddr, i);
2878           cf_a = SUB_CONFLICTS_IN_A (subscript);
2879           cf_b = SUB_CONFLICTS_IN_B (subscript);
2880
2881           fn_a = common_affine_function (cf_a);
2882           fn_b = common_affine_function (cf_b);
2883           if (!fn_a.exists () || !fn_b.exists ())
2884             {
2885               SUB_DISTANCE (subscript) = chrec_dont_know;
2886               return;
2887             }
2888           diff = affine_fn_minus (fn_a, fn_b);
2889
2890           if (affine_function_constant_p (diff))
2891             SUB_DISTANCE (subscript) = affine_function_base (diff);
2892           else
2893             SUB_DISTANCE (subscript) = chrec_dont_know;
2894
2895           affine_fn_free (diff);
2896         }
2897     }
2898 }
2899
2900 /* Returns the conflict function for "unknown".  */
2901
2902 static conflict_function *
2903 conflict_fn_not_known (void)
2904 {
2905   conflict_function *fn = XCNEW (conflict_function);
2906   fn->n = NOT_KNOWN;
2907
2908   return fn;
2909 }
2910
2911 /* Returns the conflict function for "independent".  */
2912
2913 static conflict_function *
2914 conflict_fn_no_dependence (void)
2915 {
2916   conflict_function *fn = XCNEW (conflict_function);
2917   fn->n = NO_DEPENDENCE;
2918
2919   return fn;
2920 }
2921
2922 /* Returns true if the address of OBJ is invariant in LOOP.  */
2923
2924 static bool
2925 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2926 {
2927   while (handled_component_p (obj))
2928     {
2929       if (TREE_CODE (obj) == ARRAY_REF)
2930         {
2931           for (int i = 1; i < 4; ++i)
2932             if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2933                                                         loop->num))
2934               return false;
2935         }
2936       else if (TREE_CODE (obj) == COMPONENT_REF)
2937         {
2938           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2939                                                       loop->num))
2940             return false;
2941         }
2942       obj = TREE_OPERAND (obj, 0);
2943     }
2944
2945   if (!INDIRECT_REF_P (obj)
2946       && TREE_CODE (obj) != MEM_REF)
2947     return true;
2948
2949   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2950                                                   loop->num);
2951 }
2952
2953 /* Returns false if we can prove that data references A and B do not alias,
2954    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2955    considered.  */
2956
2957 bool
2958 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2959                 class loop *loop_nest)
2960 {
2961   tree addr_a = DR_BASE_OBJECT (a);
2962   tree addr_b = DR_BASE_OBJECT (b);
2963
2964   /* If we are not processing a loop nest but scalar code we
2965      do not need to care about possible cross-iteration dependences
2966      and thus can process the full original reference.  Do so,
2967      similar to how loop invariant motion applies extra offset-based
2968      disambiguation.  */
2969   if (!loop_nest)
2970     {
2971       tree tree_size_a = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a)));
2972       tree tree_size_b = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b)));
2973
2974       if (DR_BASE_ADDRESS (a)
2975           && DR_BASE_ADDRESS (b)
2976           && operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b))
2977           && operand_equal_p (DR_OFFSET (a), DR_OFFSET (b))
2978           && poly_int_tree_p (tree_size_a)
2979           && poly_int_tree_p (tree_size_b)
2980           && !ranges_maybe_overlap_p (wi::to_widest (DR_INIT (a)),
2981                                       wi::to_widest (tree_size_a),
2982                                       wi::to_widest (DR_INIT (b)),
2983                                       wi::to_widest (tree_size_b)))
2984         {
2985           gcc_assert (integer_zerop (DR_STEP (a))
2986                       && integer_zerop (DR_STEP (b)));
2987           return false;
2988         }
2989
2990       aff_tree off1, off2;
2991       poly_widest_int size1, size2;
2992       get_inner_reference_aff (DR_REF (a), &off1, &size1);
2993       get_inner_reference_aff (DR_REF (b), &off2, &size2);
2994       aff_combination_scale (&off1, -1);
2995       aff_combination_add (&off2, &off1);
2996       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2997         return false;
2998     }
2999
3000   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
3001       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
3002       /* For cross-iteration dependences the cliques must be valid for the
3003          whole loop, not just individual iterations.  */
3004       && (!loop_nest
3005           || MR_DEPENDENCE_CLIQUE (addr_a) == 1
3006           || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
3007       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
3008       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
3009     return false;
3010
3011   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
3012      do not know the size of the base-object.  So we cannot do any
3013      offset/overlap based analysis but have to rely on points-to
3014      information only.  */
3015   if (TREE_CODE (addr_a) == MEM_REF
3016       && (DR_UNCONSTRAINED_BASE (a)
3017           || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
3018     {
3019       /* For true dependences we can apply TBAA.  */
3020       if (flag_strict_aliasing
3021           && DR_IS_WRITE (a) && DR_IS_READ (b)
3022           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3023                                      get_alias_set (DR_REF (b))))
3024         return false;
3025       if (TREE_CODE (addr_b) == MEM_REF)
3026         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3027                                        TREE_OPERAND (addr_b, 0));
3028       else
3029         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3030                                        build_fold_addr_expr (addr_b));
3031     }
3032   else if (TREE_CODE (addr_b) == MEM_REF
3033            && (DR_UNCONSTRAINED_BASE (b)
3034                || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
3035     {
3036       /* For true dependences we can apply TBAA.  */
3037       if (flag_strict_aliasing
3038           && DR_IS_WRITE (a) && DR_IS_READ (b)
3039           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3040                                      get_alias_set (DR_REF (b))))
3041         return false;
3042       if (TREE_CODE (addr_a) == MEM_REF)
3043         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3044                                        TREE_OPERAND (addr_b, 0));
3045       else
3046         return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3047                                        TREE_OPERAND (addr_b, 0));
3048     }
3049
3050   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3051      that is being subsetted in the loop nest.  */
3052   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
3053     return refs_output_dependent_p (addr_a, addr_b);
3054   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
3055     return refs_anti_dependent_p (addr_a, addr_b);
3056   return refs_may_alias_p (addr_a, addr_b);
3057 }
3058
3059 /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
3060    if it is meaningful to compare their associated access functions
3061    when checking for dependencies.  */
3062
3063 static bool
3064 access_fn_components_comparable_p (tree ref_a, tree ref_b)
3065 {
3066   /* Allow pairs of component refs from the following sets:
3067
3068        { REALPART_EXPR, IMAGPART_EXPR }
3069        { COMPONENT_REF }
3070        { ARRAY_REF }.  */
3071   tree_code code_a = TREE_CODE (ref_a);
3072   tree_code code_b = TREE_CODE (ref_b);
3073   if (code_a == IMAGPART_EXPR)
3074     code_a = REALPART_EXPR;
3075   if (code_b == IMAGPART_EXPR)
3076     code_b = REALPART_EXPR;
3077   if (code_a != code_b)
3078     return false;
3079
3080   if (TREE_CODE (ref_a) == COMPONENT_REF)
3081     /* ??? We cannot simply use the type of operand #0 of the refs here as
3082        the Fortran compiler smuggles type punning into COMPONENT_REFs.
3083        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
3084     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
3085             == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
3086
3087   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
3088                              TREE_TYPE (TREE_OPERAND (ref_b, 0)));
3089 }
3090
3091 /* Initialize a data dependence relation RES in LOOP_NEST.  USE_ALT_INDICES
3092    is true when the main indices of A and B were not comparable so we try again
3093    with alternate indices computed on an indirect reference.  */
3094
3095 struct data_dependence_relation *
3096 initialize_data_dependence_relation (struct data_dependence_relation *res,
3097                                      vec<loop_p> loop_nest,
3098                                      bool use_alt_indices)
3099 {
3100   struct data_reference *a = DDR_A (res);
3101   struct data_reference *b = DDR_B (res);
3102   unsigned int i;
3103
3104   struct indices *indices_a = &a->indices;
3105   struct indices *indices_b = &b->indices;
3106   if (use_alt_indices)
3107     {
3108       if (TREE_CODE (DR_REF (a)) != MEM_REF)
3109         indices_a = &a->alt_indices;
3110       if (TREE_CODE (DR_REF (b)) != MEM_REF)
3111         indices_b = &b->alt_indices;
3112     }
3113   unsigned int num_dimensions_a = indices_a->access_fns.length ();
3114   unsigned int num_dimensions_b = indices_b->access_fns.length ();
3115   if (num_dimensions_a == 0 || num_dimensions_b == 0)
3116     {
3117       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3118       return res;
3119     }
3120
3121   /* For unconstrained bases, the root (highest-indexed) subscript
3122      describes a variation in the base of the original DR_REF rather
3123      than a component access.  We have no type that accurately describes
3124      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3125      applying this subscript) so limit the search to the last real
3126      component access.
3127
3128      E.g. for:
3129
3130         void
3131         f (int a[][8], int b[][8])
3132         {
3133           for (int i = 0; i < 8; ++i)
3134             a[i * 2][0] = b[i][0];
3135         }
3136
3137      the a and b accesses have a single ARRAY_REF component reference [0]
3138      but have two subscripts.  */
3139   if (indices_a->unconstrained_base)
3140     num_dimensions_a -= 1;
3141   if (indices_b->unconstrained_base)
3142     num_dimensions_b -= 1;
3143
3144   /* These structures describe sequences of component references in
3145      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
3146      specific access function.  */
3147   struct {
3148     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3149        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3150        indices.  In C notation, these are the indices of the rightmost
3151        component references; e.g. for a sequence .b.c.d, the start
3152        index is for .d.  */
3153     unsigned int start_a;
3154     unsigned int start_b;
3155
3156     /* The sequence contains LENGTH consecutive access functions from
3157        each DR.  */
3158     unsigned int length;
3159
3160     /* The enclosing objects for the A and B sequences respectively,
3161        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3162        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
3163     tree object_a;
3164     tree object_b;
3165   } full_seq = {}, struct_seq = {};
3166
3167   /* Before each iteration of the loop:
3168
3169      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3170      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
3171   unsigned int index_a = 0;
3172   unsigned int index_b = 0;
3173   tree ref_a = DR_REF (a);
3174   tree ref_b = DR_REF (b);
3175
3176   /* Now walk the component references from the final DR_REFs back up to
3177      the enclosing base objects.  Each component reference corresponds
3178      to one access function in the DR, with access function 0 being for
3179      the final DR_REF and the highest-indexed access function being the
3180      one that is applied to the base of the DR.
3181
3182      Look for a sequence of component references whose access functions
3183      are comparable (see access_fn_components_comparable_p).  If more
3184      than one such sequence exists, pick the one nearest the base
3185      (which is the leftmost sequence in C notation).  Store this sequence
3186      in FULL_SEQ.
3187
3188      For example, if we have:
3189
3190         struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3191
3192         A: a[0][i].s.c.d
3193         B: __real b[0][i].s.e[i].f
3194
3195      (where d is the same type as the real component of f) then the access
3196      functions would be:
3197
3198                          0   1   2   3
3199         A:              .d  .c  .s [i]
3200
3201                  0   1   2   3   4   5
3202         B:  __real  .f [i]  .e  .s [i]
3203
3204      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3205      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
3206      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
3207      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3208      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
3209      index foo[10] arrays, so is again comparable.  The sequence is
3210      therefore:
3211
3212         A: [1, 3]  (i.e. [i].s.c)
3213         B: [3, 5]  (i.e. [i].s.e)
3214
3215      Also look for sequences of component references whose access
3216      functions are comparable and whose enclosing objects have the same
3217      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
3218      example, STRUCT_SEQ would be:
3219
3220         A: [1, 2]  (i.e. s.c)
3221         B: [3, 4]  (i.e. s.e)  */
3222   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
3223     {
3224       /* The alternate indices form always has a single dimension
3225          with unconstrained base.  */
3226       gcc_assert (!use_alt_indices);
3227
3228       /* REF_A and REF_B must be one of the component access types
3229          allowed by dr_analyze_indices.  */
3230       gcc_checking_assert (access_fn_component_p (ref_a));
3231       gcc_checking_assert (access_fn_component_p (ref_b));
3232
3233       /* Get the immediately-enclosing objects for REF_A and REF_B,
3234          i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3235          and DR_ACCESS_FN (B, INDEX_B).  */
3236       tree object_a = TREE_OPERAND (ref_a, 0);
3237       tree object_b = TREE_OPERAND (ref_b, 0);
3238
3239       tree type_a = TREE_TYPE (object_a);
3240       tree type_b = TREE_TYPE (object_b);
3241       if (access_fn_components_comparable_p (ref_a, ref_b))
3242         {
3243           /* This pair of component accesses is comparable for dependence
3244              analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3245              DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
3246           if (full_seq.start_a + full_seq.length != index_a
3247               || full_seq.start_b + full_seq.length != index_b)
3248             {
3249               /* The accesses don't extend the current sequence,
3250                  so start a new one here.  */
3251               full_seq.start_a = index_a;
3252               full_seq.start_b = index_b;
3253               full_seq.length = 0;
3254             }
3255
3256           /* Add this pair of references to the sequence.  */
3257           full_seq.length += 1;
3258           full_seq.object_a = object_a;
3259           full_seq.object_b = object_b;
3260
3261           /* If the enclosing objects are structures (and thus have the
3262              same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
3263           if (TREE_CODE (type_a) == RECORD_TYPE)
3264             struct_seq = full_seq;
3265
3266           /* Move to the next containing reference for both A and B.  */
3267           ref_a = object_a;
3268           ref_b = object_b;
3269           index_a += 1;
3270           index_b += 1;
3271           continue;
3272         }
3273
3274       /* Try to approach equal type sizes.  */
3275       if (!COMPLETE_TYPE_P (type_a)
3276           || !COMPLETE_TYPE_P (type_b)
3277           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3278           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3279         break;
3280
3281       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3282       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3283       if (size_a <= size_b)
3284         {
3285           index_a += 1;
3286           ref_a = object_a;
3287         }
3288       if (size_b <= size_a)
3289         {
3290           index_b += 1;
3291           ref_b = object_b;
3292         }
3293     }
3294
3295   /* See whether FULL_SEQ ends at the base and whether the two bases
3296      are equal.  We do not care about TBAA or alignment info so we can
3297      use OEP_ADDRESS_OF to avoid false negatives.  */
3298   tree base_a = indices_a->base_object;
3299   tree base_b = indices_b->base_object;
3300   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3301                       && full_seq.start_b + full_seq.length == num_dimensions_b
3302                       && (indices_a->unconstrained_base
3303                           == indices_b->unconstrained_base)
3304                       && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3305                       && (types_compatible_p (TREE_TYPE (base_a),
3306                                               TREE_TYPE (base_b))
3307                           || (!base_supports_access_fn_components_p (base_a)
3308                               && !base_supports_access_fn_components_p (base_b)
3309                               && operand_equal_p
3310                                    (TYPE_SIZE (TREE_TYPE (base_a)),
3311                                     TYPE_SIZE (TREE_TYPE (base_b)), 0)))
3312                       && (!loop_nest.exists ()
3313                           || (object_address_invariant_in_loop_p
3314                               (loop_nest[0], base_a))));
3315
3316   /* If the bases are the same, we can include the base variation too.
3317      E.g. the b accesses in:
3318
3319        for (int i = 0; i < n; ++i)
3320          b[i + 4][0] = b[i][0];
3321
3322      have a definite dependence distance of 4, while for:
3323
3324        for (int i = 0; i < n; ++i)
3325          a[i + 4][0] = b[i][0];
3326
3327      the dependence distance depends on the gap between a and b.
3328
3329      If the bases are different then we can only rely on the sequence
3330      rooted at a structure access, since arrays are allowed to overlap
3331      arbitrarily and change shape arbitrarily.  E.g. we treat this as
3332      valid code:
3333
3334        int a[256];
3335        ...
3336        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3337
3338      where two lvalues with the same int[4][3] type overlap, and where
3339      both lvalues are distinct from the object's declared type.  */
3340   if (same_base_p)
3341     {
3342       if (indices_a->unconstrained_base)
3343         full_seq.length += 1;
3344     }
3345   else
3346     full_seq = struct_seq;
3347
3348   /* Punt if we didn't find a suitable sequence.  */
3349   if (full_seq.length == 0)
3350     {
3351       if (use_alt_indices
3352           || (TREE_CODE (DR_REF (a)) == MEM_REF
3353               && TREE_CODE (DR_REF (b)) == MEM_REF)
3354           || may_be_nonaddressable_p (DR_REF (a))
3355           || may_be_nonaddressable_p (DR_REF (b)))
3356         {
3357           /* Fully exhausted possibilities.  */
3358           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3359           return res;
3360         }
3361
3362       /* Try evaluating both DRs as dereferences of pointers.  */
3363       if (!a->alt_indices.base_object
3364           && TREE_CODE (DR_REF (a)) != MEM_REF)
3365         {
3366           tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
3367                                  build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
3368                                  build_int_cst
3369                                    (reference_alias_ptr_type (DR_REF (a)), 0));
3370           dr_analyze_indices (&a->alt_indices, alt_ref,
3371                               loop_preheader_edge (loop_nest[0]),
3372                               loop_containing_stmt (DR_STMT (a)));
3373         }
3374       if (!b->alt_indices.base_object
3375           && TREE_CODE (DR_REF (b)) != MEM_REF)
3376         {
3377           tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
3378                                  build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
3379                                  build_int_cst
3380                                    (reference_alias_ptr_type (DR_REF (b)), 0));
3381           dr_analyze_indices (&b->alt_indices, alt_ref,
3382                               loop_preheader_edge (loop_nest[0]),
3383                               loop_containing_stmt (DR_STMT (b)));
3384         }
3385       return initialize_data_dependence_relation (res, loop_nest, true);
3386     }
3387
3388   if (!same_base_p)
3389     {
3390       /* Partial overlap is possible for different bases when strict aliasing
3391          is not in effect.  It's also possible if either base involves a union
3392          access; e.g. for:
3393
3394            struct s1 { int a[2]; };
3395            struct s2 { struct s1 b; int c; };
3396            struct s3 { int d; struct s1 e; };
3397            union u { struct s2 f; struct s3 g; } *p, *q;
3398
3399          the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3400          "p->g.e" (base "p->g") and might partially overlap the s1 at
3401          "q->g.e" (base "q->g").  */
3402       if (!flag_strict_aliasing
3403           || ref_contains_union_access_p (full_seq.object_a)
3404           || ref_contains_union_access_p (full_seq.object_b))
3405         {
3406           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3407           return res;
3408         }
3409
3410       DDR_COULD_BE_INDEPENDENT_P (res) = true;
3411       if (!loop_nest.exists ()
3412           || (object_address_invariant_in_loop_p (loop_nest[0],
3413                                                   full_seq.object_a)
3414               && object_address_invariant_in_loop_p (loop_nest[0],
3415                                                      full_seq.object_b)))
3416         {
3417           DDR_OBJECT_A (res) = full_seq.object_a;
3418           DDR_OBJECT_B (res) = full_seq.object_b;
3419         }
3420     }
3421
3422   DDR_AFFINE_P (res) = true;
3423   DDR_ARE_DEPENDENT (res) = NULL_TREE;
3424   DDR_SUBSCRIPTS (res).create (full_seq.length);
3425   DDR_LOOP_NEST (res) = loop_nest;
3426   DDR_SELF_REFERENCE (res) = false;
3427
3428   for (i = 0; i < full_seq.length; ++i)
3429     {
3430       struct subscript *subscript;
3431
3432       subscript = XNEW (struct subscript);
3433       SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
3434       SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
3435       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3436       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3437       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3438       SUB_DISTANCE (subscript) = chrec_dont_know;
3439       DDR_SUBSCRIPTS (res).safe_push (subscript);
3440     }
3441
3442   return res;
3443 }
3444
3445 /* Initialize a data dependence relation between data accesses A and
3446    B.  NB_LOOPS is the number of loops surrounding the references: the
3447    size of the classic distance/direction vectors.  */
3448
3449 struct data_dependence_relation *
3450 initialize_data_dependence_relation (struct data_reference *a,
3451                                      struct data_reference *b,
3452                                      vec<loop_p> loop_nest)
3453 {
3454   data_dependence_relation *res = XCNEW (struct data_dependence_relation);
3455   DDR_A (res) = a;
3456   DDR_B (res) = b;
3457   DDR_LOOP_NEST (res).create (0);
3458   DDR_SUBSCRIPTS (res).create (0);
3459   DDR_DIR_VECTS (res).create (0);
3460   DDR_DIST_VECTS (res).create (0);
3461
3462   if (a == NULL || b == NULL)
3463     {
3464       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3465       return res;
3466     }
3467
3468   /* If the data references do not alias, then they are independent.  */
3469   if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
3470     {
3471       DDR_ARE_DEPENDENT (res) = chrec_known;
3472       return res;
3473     }
3474
3475   return initialize_data_dependence_relation (res, loop_nest, false);
3476 }
3477
3478
3479 /* Frees memory used by the conflict function F.  */
3480
3481 static void
3482 free_conflict_function (conflict_function *f)
3483 {
3484   unsigned i;
3485
3486   if (CF_NONTRIVIAL_P (f))
3487     {
3488       for (i = 0; i < f->n; i++)
3489         affine_fn_free (f->fns[i]);
3490     }
3491   free (f);
3492 }
3493
3494 /* Frees memory used by SUBSCRIPTS.  */
3495
3496 static void
3497 free_subscripts (vec<subscript_p> subscripts)
3498 {
3499   for (subscript_p s : subscripts)
3500     {
3501       free_conflict_function (s->conflicting_iterations_in_a);
3502       free_conflict_function (s->conflicting_iterations_in_b);
3503       free (s);
3504     }
3505   subscripts.release ();
3506 }
3507
3508 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3509    description.  */
3510
3511 static inline void
3512 finalize_ddr_dependent (struct data_dependence_relation *ddr,
3513                         tree chrec)
3514 {
3515   DDR_ARE_DEPENDENT (ddr) = chrec;
3516   free_subscripts (DDR_SUBSCRIPTS (ddr));
3517   DDR_SUBSCRIPTS (ddr).create (0);
3518 }
3519
3520 /* The dependence relation DDR cannot be represented by a distance
3521    vector.  */
3522
3523 static inline void
3524 non_affine_dependence_relation (struct data_dependence_relation *ddr)
3525 {
3526   if (dump_file && (dump_flags & TDF_DETAILS))
3527     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3528
3529   DDR_AFFINE_P (ddr) = false;
3530 }
3531
3532 \f
3533
3534 /* This section contains the classic Banerjee tests.  */
3535
3536 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3537    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
3538
3539 static inline bool
3540 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3541 {
3542   return (evolution_function_is_constant_p (chrec_a)
3543           && evolution_function_is_constant_p (chrec_b));
3544 }
3545
3546 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3547    variable, i.e., if the SIV (Single Index Variable) test is true.  */
3548
3549 static bool
3550 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3551 {
3552   if ((evolution_function_is_constant_p (chrec_a)
3553        && evolution_function_is_univariate_p (chrec_b))
3554       || (evolution_function_is_constant_p (chrec_b)
3555           && evolution_function_is_univariate_p (chrec_a)))
3556     return true;
3557
3558   if (evolution_function_is_univariate_p (chrec_a)
3559       && evolution_function_is_univariate_p (chrec_b))
3560     {
3561       switch (TREE_CODE (chrec_a))
3562         {
3563         case POLYNOMIAL_CHREC:
3564           switch (TREE_CODE (chrec_b))
3565             {
3566             case POLYNOMIAL_CHREC:
3567               if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3568                 return false;
3569               /* FALLTHRU */
3570
3571             default:
3572               return true;
3573             }
3574
3575         default:
3576           return true;
3577         }
3578     }
3579
3580   return false;
3581 }
3582
3583 /* Creates a conflict function with N dimensions.  The affine functions
3584    in each dimension follow.  */
3585
3586 static conflict_function *
3587 conflict_fn (unsigned n, ...)
3588 {
3589   unsigned i;
3590   conflict_function *ret = XCNEW (conflict_function);
3591   va_list ap;
3592
3593   gcc_assert (n > 0 && n <= MAX_DIM);
3594   va_start (ap, n);
3595
3596   ret->n = n;
3597   for (i = 0; i < n; i++)
3598     ret->fns[i] = va_arg (ap, affine_fn);
3599   va_end (ap);
3600
3601   return ret;
3602 }
3603
3604 /* Returns constant affine function with value CST.  */
3605
3606 static affine_fn
3607 affine_fn_cst (tree cst)
3608 {
3609   affine_fn fn;
3610   fn.create (1);
3611   fn.quick_push (cst);
3612   return fn;
3613 }
3614
3615 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
3616
3617 static affine_fn
3618 affine_fn_univar (tree cst, unsigned dim, tree coef)
3619 {
3620   affine_fn fn;
3621   fn.create (dim + 1);
3622   unsigned i;
3623
3624   gcc_assert (dim > 0);
3625   fn.quick_push (cst);
3626   for (i = 1; i < dim; i++)
3627     fn.quick_push (integer_zero_node);
3628   fn.quick_push (coef);
3629   return fn;
3630 }
3631
3632 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
3633    *OVERLAPS_B are initialized to the functions that describe the
3634    relation between the elements accessed twice by CHREC_A and
3635    CHREC_B.  For k >= 0, the following property is verified:
3636
3637    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3638
3639 static void
3640 analyze_ziv_subscript (tree chrec_a,
3641                        tree chrec_b,
3642                        conflict_function **overlaps_a,
3643                        conflict_function **overlaps_b,
3644                        tree *last_conflicts)
3645 {
3646   tree type, difference;
3647   dependence_stats.num_ziv++;
3648
3649   if (dump_file && (dump_flags & TDF_DETAILS))
3650     fprintf (dump_file, "(analyze_ziv_subscript \n");
3651
3652   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3653   chrec_a = chrec_convert (type, chrec_a, NULL);
3654   chrec_b = chrec_convert (type, chrec_b, NULL);
3655   difference = chrec_fold_minus (type, chrec_a, chrec_b);
3656
3657   switch (TREE_CODE (difference))
3658     {
3659     case INTEGER_CST:
3660       if (integer_zerop (difference))
3661         {
3662           /* The difference is equal to zero: the accessed index
3663              overlaps for each iteration in the loop.  */
3664           *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3665           *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3666           *last_conflicts = chrec_dont_know;
3667           dependence_stats.num_ziv_dependent++;
3668         }
3669       else
3670         {
3671           /* The accesses do not overlap.  */
3672           *overlaps_a = conflict_fn_no_dependence ();
3673           *overlaps_b = conflict_fn_no_dependence ();
3674           *last_conflicts = integer_zero_node;
3675           dependence_stats.num_ziv_independent++;
3676         }
3677       break;
3678
3679     default:
3680       /* We're not sure whether the indexes overlap.  For the moment,
3681          conservatively answer "don't know".  */
3682       if (dump_file && (dump_flags & TDF_DETAILS))
3683         fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3684
3685       *overlaps_a = conflict_fn_not_known ();
3686       *overlaps_b = conflict_fn_not_known ();
3687       *last_conflicts = chrec_dont_know;
3688       dependence_stats.num_ziv_unimplemented++;
3689       break;
3690     }
3691
3692   if (dump_file && (dump_flags & TDF_DETAILS))
3693     fprintf (dump_file, ")\n");
3694 }
3695
3696 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3697    and only if it fits to the int type.  If this is not the case, or the
3698    bound  on the number of iterations of LOOP could not be derived, returns
3699    chrec_dont_know.  */
3700
3701 static tree
3702 max_stmt_executions_tree (class loop *loop)
3703 {
3704   widest_int nit;
3705
3706   if (!max_stmt_executions (loop, &nit))
3707     return chrec_dont_know;
3708
3709   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3710     return chrec_dont_know;
3711
3712   return wide_int_to_tree (unsigned_type_node, nit);
3713 }
3714
3715 /* Determine whether the CHREC is always positive/negative.  If the expression
3716    cannot be statically analyzed, return false, otherwise set the answer into
3717    VALUE.  */
3718
3719 static bool
3720 chrec_is_positive (tree chrec, bool *value)
3721 {
3722   bool value0, value1, value2;
3723   tree end_value, nb_iter;
3724
3725   switch (TREE_CODE (chrec))
3726     {
3727     case POLYNOMIAL_CHREC:
3728       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3729           || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3730         return false;
3731
3732       /* FIXME -- overflows.  */
3733       if (value0 == value1)
3734         {
3735           *value = value0;
3736           return true;
3737         }
3738
3739       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3740          and the proof consists in showing that the sign never
3741          changes during the execution of the loop, from 0 to
3742          loop->nb_iterations.  */
3743       if (!evolution_function_is_affine_p (chrec))
3744         return false;
3745
3746       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3747       if (chrec_contains_undetermined (nb_iter))
3748         return false;
3749
3750 #if 0
3751       /* TODO -- If the test is after the exit, we may decrease the number of
3752          iterations by one.  */
3753       if (after_exit)
3754         nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3755 #endif
3756
3757       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3758
3759       if (!chrec_is_positive (end_value, &value2))
3760         return false;
3761
3762       *value = value0;
3763       return value0 == value1;
3764
3765     case INTEGER_CST:
3766       switch (tree_int_cst_sgn (chrec))
3767         {
3768         case -1:
3769           *value = false;
3770           break;
3771         case 1:
3772           *value = true;
3773           break;
3774         default:
3775           return false;
3776         }
3777       return true;
3778
3779     default:
3780       return false;
3781     }
3782 }
3783
3784
3785 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3786    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
3787    *OVERLAPS_B are initialized to the functions that describe the
3788    relation between the elements accessed twice by CHREC_A and
3789    CHREC_B.  For k >= 0, the following property is verified:
3790
3791    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3792
3793 static void
3794 analyze_siv_subscript_cst_affine (tree chrec_a,
3795                                   tree chrec_b,
3796                                   conflict_function **overlaps_a,
3797                                   conflict_function **overlaps_b,
3798                                   tree *last_conflicts)
3799 {
3800   bool value0, value1, value2;
3801   tree type, difference, tmp;
3802
3803   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3804   chrec_a = chrec_convert (type, chrec_a, NULL);
3805   chrec_b = chrec_convert (type, chrec_b, NULL);
3806   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3807
3808   /* Special case overlap in the first iteration.  */
3809   if (integer_zerop (difference))
3810     {
3811       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3812       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3813       *last_conflicts = integer_one_node;
3814       return;
3815     }
3816
3817   if (!chrec_is_positive (initial_condition (difference), &value0))
3818     {
3819       if (dump_file && (dump_flags & TDF_DETAILS))
3820         fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3821
3822       dependence_stats.num_siv_unimplemented++;
3823       *overlaps_a = conflict_fn_not_known ();
3824       *overlaps_b = conflict_fn_not_known ();
3825       *last_conflicts = chrec_dont_know;
3826       return;
3827     }
3828   else
3829     {
3830       if (value0 == false)
3831         {
3832           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3833               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3834             {
3835               if (dump_file && (dump_flags & TDF_DETAILS))
3836                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3837
3838               *overlaps_a = conflict_fn_not_known ();
3839               *overlaps_b = conflict_fn_not_known ();
3840               *last_conflicts = chrec_dont_know;
3841               dependence_stats.num_siv_unimplemented++;
3842               return;
3843             }
3844           else
3845             {
3846               if (value1 == true)
3847                 {
3848                   /* Example:
3849                      chrec_a = 12
3850                      chrec_b = {10, +, 1}
3851                   */
3852
3853                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3854                     {
3855                       HOST_WIDE_INT numiter;
3856                       class loop *loop = get_chrec_loop (chrec_b);
3857
3858                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3859                       tmp = fold_build2 (EXACT_DIV_EXPR, type,
3860                                          fold_build1 (ABS_EXPR, type, difference),
3861                                          CHREC_RIGHT (chrec_b));
3862                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3863                       *last_conflicts = integer_one_node;
3864
3865
3866                       /* Perform weak-zero siv test to see if overlap is
3867                          outside the loop bounds.  */
3868                       numiter = max_stmt_executions_int (loop);
3869
3870                       if (numiter >= 0
3871                           && compare_tree_int (tmp, numiter) > 0)
3872                         {
3873                           free_conflict_function (*overlaps_a);
3874                           free_conflict_function (*overlaps_b);
3875                           *overlaps_a = conflict_fn_no_dependence ();
3876                           *overlaps_b = conflict_fn_no_dependence ();
3877                           *last_conflicts = integer_zero_node;
3878                           dependence_stats.num_siv_independent++;
3879                           return;
3880                         }
3881                       dependence_stats.num_siv_dependent++;
3882                       return;
3883                     }
3884
3885                   /* When the step does not divide the difference, there are
3886                      no overlaps.  */
3887                   else
3888                     {
3889                       *overlaps_a = conflict_fn_no_dependence ();
3890                       *overlaps_b = conflict_fn_no_dependence ();
3891                       *last_conflicts = integer_zero_node;
3892                       dependence_stats.num_siv_independent++;
3893                       return;
3894                     }
3895                 }
3896
3897               else
3898                 {
3899                   /* Example:
3900                      chrec_a = 12
3901                      chrec_b = {10, +, -1}
3902
3903                      In this case, chrec_a will not overlap with chrec_b.  */
3904                   *overlaps_a = conflict_fn_no_dependence ();
3905                   *overlaps_b = conflict_fn_no_dependence ();
3906                   *last_conflicts = integer_zero_node;
3907                   dependence_stats.num_siv_independent++;
3908                   return;
3909                 }
3910             }
3911         }
3912       else
3913         {
3914           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3915               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3916             {
3917               if (dump_file && (dump_flags & TDF_DETAILS))
3918                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3919
3920               *overlaps_a = conflict_fn_not_known ();
3921               *overlaps_b = conflict_fn_not_known ();
3922               *last_conflicts = chrec_dont_know;
3923               dependence_stats.num_siv_unimplemented++;
3924               return;
3925             }
3926           else
3927             {
3928               if (value2 == false)
3929                 {
3930                   /* Example:
3931                      chrec_a = 3
3932                      chrec_b = {10, +, -1}
3933                   */
3934                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3935                     {
3936                       HOST_WIDE_INT numiter;
3937                       class loop *loop = get_chrec_loop (chrec_b);
3938
3939                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3940                       tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3941                                          CHREC_RIGHT (chrec_b));
3942                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3943                       *last_conflicts = integer_one_node;
3944
3945                       /* Perform weak-zero siv test to see if overlap is
3946                          outside the loop bounds.  */
3947                       numiter = max_stmt_executions_int (loop);
3948
3949                       if (numiter >= 0
3950                           && compare_tree_int (tmp, numiter) > 0)
3951                         {
3952                           free_conflict_function (*overlaps_a);
3953                           free_conflict_function (*overlaps_b);
3954                           *overlaps_a = conflict_fn_no_dependence ();
3955                           *overlaps_b = conflict_fn_no_dependence ();
3956                           *last_conflicts = integer_zero_node;
3957                           dependence_stats.num_siv_independent++;
3958                           return;
3959                         }
3960                       dependence_stats.num_siv_dependent++;
3961                       return;
3962                     }
3963
3964                   /* When the step does not divide the difference, there
3965                      are no overlaps.  */
3966                   else
3967                     {
3968                       *overlaps_a = conflict_fn_no_dependence ();
3969                       *overlaps_b = conflict_fn_no_dependence ();
3970                       *last_conflicts = integer_zero_node;
3971                       dependence_stats.num_siv_independent++;
3972                       return;
3973                     }
3974                 }
3975               else
3976                 {
3977                   /* Example:
3978                      chrec_a = 3
3979                      chrec_b = {4, +, 1}
3980
3981                      In this case, chrec_a will not overlap with chrec_b.  */
3982                   *overlaps_a = conflict_fn_no_dependence ();
3983                   *overlaps_b = conflict_fn_no_dependence ();
3984                   *last_conflicts = integer_zero_node;
3985                   dependence_stats.num_siv_independent++;
3986                   return;
3987                 }
3988             }
3989         }
3990     }
3991 }
3992
3993 /* Helper recursive function for initializing the matrix A.  Returns
3994    the initial value of CHREC.  */
3995
3996 static tree
3997 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3998 {
3999   gcc_assert (chrec);
4000
4001   switch (TREE_CODE (chrec))
4002     {
4003     case POLYNOMIAL_CHREC:
4004       HOST_WIDE_INT chrec_right;
4005       if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
4006         return chrec_dont_know;
4007       chrec_right = int_cst_value (CHREC_RIGHT (chrec));
4008       /* We want to be able to negate without overflow.  */
4009       if (chrec_right == HOST_WIDE_INT_MIN)
4010         return chrec_dont_know;
4011       A[index][0] = mult * chrec_right;
4012       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
4013
4014     case PLUS_EXPR:
4015     case MULT_EXPR:
4016     case MINUS_EXPR:
4017       {
4018         tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4019         tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
4020
4021         return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
4022       }
4023
4024     CASE_CONVERT:
4025       {
4026         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4027         return chrec_convert (chrec_type (chrec), op, NULL);
4028       }
4029
4030     case BIT_NOT_EXPR:
4031       {
4032         /* Handle ~X as -1 - X.  */
4033         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4034         return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
4035                               build_int_cst (TREE_TYPE (chrec), -1), op);
4036       }
4037
4038     case INTEGER_CST:
4039       return chrec;
4040
4041     default:
4042       gcc_unreachable ();
4043       return NULL_TREE;
4044     }
4045 }
4046
4047 #define FLOOR_DIV(x,y) ((x) / (y))
4048
4049 /* Solves the special case of the Diophantine equation:
4050    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4051
4052    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
4053    number of iterations that loops X and Y run.  The overlaps will be
4054    constructed as evolutions in dimension DIM.  */
4055
4056 static void
4057 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
4058                                          HOST_WIDE_INT step_a,
4059                                          HOST_WIDE_INT step_b,
4060                                          affine_fn *overlaps_a,
4061                                          affine_fn *overlaps_b,
4062                                          tree *last_conflicts, int dim)
4063 {
4064   if (((step_a > 0 && step_b > 0)
4065        || (step_a < 0 && step_b < 0)))
4066     {
4067       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
4068       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
4069
4070       gcd_steps_a_b = gcd (step_a, step_b);
4071       step_overlaps_a = step_b / gcd_steps_a_b;
4072       step_overlaps_b = step_a / gcd_steps_a_b;
4073
4074       if (niter > 0)
4075         {
4076           tau2 = FLOOR_DIV (niter, step_overlaps_a);
4077           tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
4078           last_conflict = tau2;
4079           *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
4080         }
4081       else
4082         *last_conflicts = chrec_dont_know;
4083
4084       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
4085                                       build_int_cst (NULL_TREE,
4086                                                      step_overlaps_a));
4087       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
4088                                       build_int_cst (NULL_TREE,
4089                                                      step_overlaps_b));
4090     }
4091
4092   else
4093     {
4094       *overlaps_a = affine_fn_cst (integer_zero_node);
4095       *overlaps_b = affine_fn_cst (integer_zero_node);
4096       *last_conflicts = integer_zero_node;
4097     }
4098 }
4099
4100 /* Solves the special case of a Diophantine equation where CHREC_A is
4101    an affine bivariate function, and CHREC_B is an affine univariate
4102    function.  For example,
4103
4104    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4105
4106    has the following overlapping functions:
4107
4108    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4109    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4110    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4111
4112    FORNOW: This is a specialized implementation for a case occurring in
4113    a common benchmark.  Implement the general algorithm.  */
4114
4115 static void
4116 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
4117                                       conflict_function **overlaps_a,
4118                                       conflict_function **overlaps_b,
4119                                       tree *last_conflicts)
4120 {
4121   bool xz_p, yz_p, xyz_p;
4122   HOST_WIDE_INT step_x, step_y, step_z;
4123   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
4124   affine_fn overlaps_a_xz, overlaps_b_xz;
4125   affine_fn overlaps_a_yz, overlaps_b_yz;
4126   affine_fn overlaps_a_xyz, overlaps_b_xyz;
4127   affine_fn ova1, ova2, ovb;
4128   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
4129
4130   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
4131   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
4132   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
4133
4134   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
4135   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
4136   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
4137
4138   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
4139     {
4140       if (dump_file && (dump_flags & TDF_DETAILS))
4141         fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
4142
4143       *overlaps_a = conflict_fn_not_known ();
4144       *overlaps_b = conflict_fn_not_known ();
4145       *last_conflicts = chrec_dont_know;
4146       return;
4147     }
4148
4149   niter = MIN (niter_x, niter_z);
4150   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
4151                                            &overlaps_a_xz,
4152                                            &overlaps_b_xz,
4153                                            &last_conflicts_xz, 1);
4154   niter = MIN (niter_y, niter_z);
4155   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
4156                                            &overlaps_a_yz,
4157                                            &overlaps_b_yz,
4158                                            &last_conflicts_yz, 2);
4159   niter = MIN (niter_x, niter_z);
4160   niter = MIN (niter_y, niter);
4161   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
4162                                            &overlaps_a_xyz,
4163                                            &overlaps_b_xyz,
4164                                            &last_conflicts_xyz, 3);
4165
4166   xz_p = !integer_zerop (last_conflicts_xz);
4167   yz_p = !integer_zerop (last_conflicts_yz);
4168   xyz_p = !integer_zerop (last_conflicts_xyz);
4169
4170   if (xz_p || yz_p || xyz_p)
4171     {
4172       ova1 = affine_fn_cst (integer_zero_node);
4173       ova2 = affine_fn_cst (integer_zero_node);
4174       ovb = affine_fn_cst (integer_zero_node);
4175       if (xz_p)
4176         {
4177           affine_fn t0 = ova1;
4178           affine_fn t2 = ovb;
4179
4180           ova1 = affine_fn_plus (ova1, overlaps_a_xz);
4181           ovb = affine_fn_plus (ovb, overlaps_b_xz);
4182           affine_fn_free (t0);
4183           affine_fn_free (t2);
4184           *last_conflicts = last_conflicts_xz;
4185         }
4186       if (yz_p)
4187         {
4188           affine_fn t0 = ova2;
4189           affine_fn t2 = ovb;
4190
4191           ova2 = affine_fn_plus (ova2, overlaps_a_yz);
4192           ovb = affine_fn_plus (ovb, overlaps_b_yz);
4193           affine_fn_free (t0);
4194           affine_fn_free (t2);
4195           *last_conflicts = last_conflicts_yz;
4196         }
4197       if (xyz_p)
4198         {
4199           affine_fn t0 = ova1;
4200           affine_fn t2 = ova2;
4201           affine_fn t4 = ovb;
4202
4203           ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
4204           ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
4205           ovb = affine_fn_plus (ovb, overlaps_b_xyz);
4206           affine_fn_free (t0);
4207           affine_fn_free (t2);
4208           affine_fn_free (t4);
4209           *last_conflicts = last_conflicts_xyz;
4210         }
4211       *overlaps_a = conflict_fn (2, ova1, ova2);
4212       *overlaps_b = conflict_fn (1, ovb);
4213     }
4214   else
4215     {
4216       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4217       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4218       *last_conflicts = integer_zero_node;
4219     }
4220
4221   affine_fn_free (overlaps_a_xz);
4222   affine_fn_free (overlaps_b_xz);
4223   affine_fn_free (overlaps_a_yz);
4224   affine_fn_free (overlaps_b_yz);
4225   affine_fn_free (overlaps_a_xyz);
4226   affine_fn_free (overlaps_b_xyz);
4227 }
4228
4229 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
4230
4231 static void
4232 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
4233                     int size)
4234 {
4235   memcpy (vec2, vec1, size * sizeof (*vec1));
4236 }
4237
4238 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
4239
4240 static void
4241 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
4242                     int m, int n)
4243 {
4244   int i;
4245
4246   for (i = 0; i < m; i++)
4247     lambda_vector_copy (mat1[i], mat2[i], n);
4248 }
4249
4250 /* Store the N x N identity matrix in MAT.  */
4251
4252 static void
4253 lambda_matrix_id (lambda_matrix mat, int size)
4254 {
4255   int i, j;
4256
4257   for (i = 0; i < size; i++)
4258     for (j = 0; j < size; j++)
4259       mat[i][j] = (i == j) ? 1 : 0;
4260 }
4261
4262 /* Return the index of the first nonzero element of vector VEC1 between
4263    START and N.  We must have START <= N.
4264    Returns N if VEC1 is the zero vector.  */
4265
4266 static int
4267 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
4268 {
4269   int j = start;
4270   while (j < n && vec1[j] == 0)
4271     j++;
4272   return j;
4273 }
4274
4275 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4276    R2 = R2 + CONST1 * R1.  */
4277
4278 static bool
4279 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
4280                        lambda_int const1)
4281 {
4282   int i;
4283
4284   if (const1 == 0)
4285     return true;
4286
4287   for (i = 0; i < n; i++)
4288     {
4289       bool ovf;
4290       lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
4291       if (ovf)
4292         return false;
4293       lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
4294       if (ovf || tem2 == HOST_WIDE_INT_MIN)
4295         return false;
4296       mat[r2][i] = tem2;
4297     }
4298
4299   return true;
4300 }
4301
4302 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
4303    and store the result in VEC2.  */
4304
4305 static void
4306 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
4307                           int size, lambda_int const1)
4308 {
4309   int i;
4310
4311   if (const1 == 0)
4312     lambda_vector_clear (vec2, size);
4313   else
4314     for (i = 0; i < size; i++)
4315       vec2[i] = const1 * vec1[i];
4316 }
4317
4318 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
4319
4320 static void
4321 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
4322                       int size)
4323 {
4324   lambda_vector_mult_const (vec1, vec2, size, -1);
4325 }
4326
4327 /* Negate row R1 of matrix MAT which has N columns.  */
4328
4329 static void
4330 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
4331 {
4332   lambda_vector_negate (mat[r1], mat[r1], n);
4333 }
4334
4335 /* Return true if two vectors are equal.  */
4336
4337 static bool
4338 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
4339 {
4340   int i;
4341   for (i = 0; i < size; i++)
4342     if (vec1[i] != vec2[i])
4343       return false;
4344   return true;
4345 }
4346
4347 /* Given an M x N integer matrix A, this function determines an M x
4348    M unimodular matrix U, and an M x N echelon matrix S such that
4349    "U.A = S".  This decomposition is also known as "right Hermite".
4350
4351    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4352    Restructuring Compilers" Utpal Banerjee.  */
4353
4354 static bool
4355 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4356                              lambda_matrix S, lambda_matrix U)
4357 {
4358   int i, j, i0 = 0;
4359
4360   lambda_matrix_copy (A, S, m, n);
4361   lambda_matrix_id (U, m);
4362
4363   for (j = 0; j < n; j++)
4364     {
4365       if (lambda_vector_first_nz (S[j], m, i0) < m)
4366         {
4367           ++i0;
4368           for (i = m - 1; i >= i0; i--)
4369             {
4370               while (S[i][j] != 0)
4371                 {
4372                   lambda_int factor, a, b;
4373
4374                   a = S[i-1][j];
4375                   b = S[i][j];
4376                   gcc_assert (a != HOST_WIDE_INT_MIN);
4377                   factor = a / b;
4378
4379                   if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
4380                     return false;
4381                   std::swap (S[i], S[i-1]);
4382
4383                   if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
4384                     return false;
4385                   std::swap (U[i], U[i-1]);
4386                 }
4387             }
4388         }
4389     }
4390
4391   return true;
4392 }
4393
4394 /* Determines the overlapping elements due to accesses CHREC_A and
4395    CHREC_B, that are affine functions.  This function cannot handle
4396    symbolic evolution functions, ie. when initial conditions are
4397    parameters, because it uses lambda matrices of integers.  */
4398
4399 static void
4400 analyze_subscript_affine_affine (tree chrec_a,
4401                                  tree chrec_b,
4402                                  conflict_function **overlaps_a,
4403                                  conflict_function **overlaps_b,
4404                                  tree *last_conflicts)
4405 {
4406   unsigned nb_vars_a, nb_vars_b, dim;
4407   lambda_int gamma, gcd_alpha_beta;
4408   lambda_matrix A, U, S;
4409   struct obstack scratch_obstack;
4410
4411   if (eq_evolutions_p (chrec_a, chrec_b))
4412     {
4413       /* The accessed index overlaps for each iteration in the
4414          loop.  */
4415       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4416       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4417       *last_conflicts = chrec_dont_know;
4418       return;
4419     }
4420   if (dump_file && (dump_flags & TDF_DETAILS))
4421     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4422
4423   /* For determining the initial intersection, we have to solve a
4424      Diophantine equation.  This is the most time consuming part.
4425
4426      For answering to the question: "Is there a dependence?" we have
4427      to prove that there exists a solution to the Diophantine
4428      equation, and that the solution is in the iteration domain,
4429      i.e. the solution is positive or zero, and that the solution
4430      happens before the upper bound loop.nb_iterations.  Otherwise
4431      there is no dependence.  This function outputs a description of
4432      the iterations that hold the intersections.  */
4433
4434   nb_vars_a = nb_vars_in_chrec (chrec_a);
4435   nb_vars_b = nb_vars_in_chrec (chrec_b);
4436
4437   gcc_obstack_init (&scratch_obstack);
4438
4439   dim = nb_vars_a + nb_vars_b;
4440   U = lambda_matrix_new (dim, dim, &scratch_obstack);
4441   A = lambda_matrix_new (dim, 1, &scratch_obstack);
4442   S = lambda_matrix_new (dim, 1, &scratch_obstack);
4443
4444   tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4445   tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4446   if (init_a == chrec_dont_know
4447       || init_b == chrec_dont_know)
4448     {
4449       if (dump_file && (dump_flags & TDF_DETAILS))
4450         fprintf (dump_file, "affine-affine test failed: "
4451                  "representation issue.\n");
4452       *overlaps_a = conflict_fn_not_known ();
4453       *overlaps_b = conflict_fn_not_known ();
4454       *last_conflicts = chrec_dont_know;
4455       goto end_analyze_subs_aa;
4456     }
4457   gamma = int_cst_value (init_b) - int_cst_value (init_a);
4458
4459   /* Don't do all the hard work of solving the Diophantine equation
4460      when we already know the solution: for example,
4461      | {3, +, 1}_1
4462      | {3, +, 4}_2
4463      | gamma = 3 - 3 = 0.
4464      Then the first overlap occurs during the first iterations:
4465      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4466   */
4467   if (gamma == 0)
4468     {
4469       if (nb_vars_a == 1 && nb_vars_b == 1)
4470         {
4471           HOST_WIDE_INT step_a, step_b;
4472           HOST_WIDE_INT niter, niter_a, niter_b;
4473           affine_fn ova, ovb;
4474
4475           niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4476           niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4477           niter = MIN (niter_a, niter_b);
4478           step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4479           step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4480
4481           compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4482                                                    &ova, &ovb,
4483                                                    last_conflicts, 1);
4484           *overlaps_a = conflict_fn (1, ova);
4485           *overlaps_b = conflict_fn (1, ovb);
4486         }
4487
4488       else if (nb_vars_a == 2 && nb_vars_b == 1)
4489         compute_overlap_steps_for_affine_1_2
4490           (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4491
4492       else if (nb_vars_a == 1 && nb_vars_b == 2)
4493         compute_overlap_steps_for_affine_1_2
4494           (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4495
4496       else
4497         {
4498           if (dump_file && (dump_flags & TDF_DETAILS))
4499             fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4500           *overlaps_a = conflict_fn_not_known ();
4501           *overlaps_b = conflict_fn_not_known ();
4502           *last_conflicts = chrec_dont_know;
4503         }
4504       goto end_analyze_subs_aa;
4505     }
4506
4507   /* U.A = S */
4508   if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
4509     {
4510       *overlaps_a = conflict_fn_not_known ();
4511       *overlaps_b = conflict_fn_not_known ();
4512       *last_conflicts = chrec_dont_know;
4513       goto end_analyze_subs_aa;
4514     }
4515
4516   if (S[0][0] < 0)
4517     {
4518       S[0][0] *= -1;
4519       lambda_matrix_row_negate (U, dim, 0);
4520     }
4521   gcd_alpha_beta = S[0][0];
4522
4523   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4524      but that is a quite strange case.  Instead of ICEing, answer
4525      don't know.  */
4526   if (gcd_alpha_beta == 0)
4527     {
4528       *overlaps_a = conflict_fn_not_known ();
4529       *overlaps_b = conflict_fn_not_known ();
4530       *last_conflicts = chrec_dont_know;
4531       goto end_analyze_subs_aa;
4532     }
4533
4534   /* The classic "gcd-test".  */
4535   if (!int_divides_p (gcd_alpha_beta, gamma))
4536     {
4537       /* The "gcd-test" has determined that there is no integer
4538          solution, i.e. there is no dependence.  */
4539       *overlaps_a = conflict_fn_no_dependence ();
4540       *overlaps_b = conflict_fn_no_dependence ();
4541       *last_conflicts = integer_zero_node;
4542     }
4543
4544   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
4545   else if (nb_vars_a == 1 && nb_vars_b == 1)
4546     {
4547       /* Both functions should have the same evolution sign.  */
4548       if (((A[0][0] > 0 && -A[1][0] > 0)
4549            || (A[0][0] < 0 && -A[1][0] < 0)))
4550         {
4551           /* The solutions are given by:
4552              |
4553              | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
4554              |                           [u21 u22]    [y0]
4555
4556              For a given integer t.  Using the following variables,
4557
4558              | i0 = u11 * gamma / gcd_alpha_beta
4559              | j0 = u12 * gamma / gcd_alpha_beta
4560              | i1 = u21
4561              | j1 = u22
4562
4563              the solutions are:
4564
4565              | x0 = i0 + i1 * t,
4566              | y0 = j0 + j1 * t.  */
4567           HOST_WIDE_INT i0, j0, i1, j1;
4568
4569           i0 = U[0][0] * gamma / gcd_alpha_beta;
4570           j0 = U[0][1] * gamma / gcd_alpha_beta;
4571           i1 = U[1][0];
4572           j1 = U[1][1];
4573
4574           if ((i1 == 0 && i0 < 0)
4575               || (j1 == 0 && j0 < 0))
4576             {
4577               /* There is no solution.
4578                  FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4579                  falls in here, but for the moment we don't look at the
4580                  upper bound of the iteration domain.  */
4581               *overlaps_a = conflict_fn_no_dependence ();
4582               *overlaps_b = conflict_fn_no_dependence ();
4583               *last_conflicts = integer_zero_node;
4584               goto end_analyze_subs_aa;
4585             }
4586
4587           if (i1 > 0 && j1 > 0)
4588             {
4589               HOST_WIDE_INT niter_a
4590                 = max_stmt_executions_int (get_chrec_loop (chrec_a));
4591               HOST_WIDE_INT niter_b
4592                 = max_stmt_executions_int (get_chrec_loop (chrec_b));
4593               HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4594
4595               /* (X0, Y0) is a solution of the Diophantine equation:
4596                  "chrec_a (X0) = chrec_b (Y0)".  */
4597               HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4598                                         CEIL (-j0, j1));
4599               HOST_WIDE_INT x0 = i1 * tau1 + i0;
4600               HOST_WIDE_INT y0 = j1 * tau1 + j0;
4601
4602               /* (X1, Y1) is the smallest positive solution of the eq
4603                  "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4604                  first conflict occurs.  */
4605               HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4606               HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4607               HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4608
4609               if (niter > 0)
4610                 {
4611                   /* If the overlap occurs outside of the bounds of the
4612                      loop, there is no dependence.  */
4613                   if (x1 >= niter_a || y1 >= niter_b)
4614                     {
4615                       *overlaps_a = conflict_fn_no_dependence ();
4616                       *overlaps_b = conflict_fn_no_dependence ();
4617                       *last_conflicts = integer_zero_node;
4618                       goto end_analyze_subs_aa;
4619                     }
4620
4621                   /* max stmt executions can get quite large, avoid
4622                      overflows by using wide ints here.  */
4623                   widest_int tau2
4624                     = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4625                                 wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4626                   widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4627                   if (wi::min_precision (last_conflict, SIGNED)
4628                       <= TYPE_PRECISION (integer_type_node))
4629                     *last_conflicts
4630                        = build_int_cst (integer_type_node,
4631                                         last_conflict.to_shwi ());
4632                   else
4633                     *last_conflicts = chrec_dont_know;
4634                 }
4635               else
4636                 *last_conflicts = chrec_dont_know;
4637
4638               *overlaps_a
4639                 = conflict_fn (1,
4640                                affine_fn_univar (build_int_cst (NULL_TREE, x1),
4641                                                  1,
4642                                                  build_int_cst (NULL_TREE, i1)));
4643               *overlaps_b
4644                 = conflict_fn (1,
4645                                affine_fn_univar (build_int_cst (NULL_TREE, y1),
4646                                                  1,
4647                                                  build_int_cst (NULL_TREE, j1)));
4648             }
4649           else
4650             {
4651               /* FIXME: For the moment, the upper bound of the
4652                  iteration domain for i and j is not checked.  */
4653               if (dump_file && (dump_flags & TDF_DETAILS))
4654                 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4655               *overlaps_a = conflict_fn_not_known ();
4656               *overlaps_b = conflict_fn_not_known ();
4657               *last_conflicts = chrec_dont_know;
4658             }
4659         }
4660       else
4661         {
4662           if (dump_file && (dump_flags & TDF_DETAILS))
4663             fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4664           *overlaps_a = conflict_fn_not_known ();
4665           *overlaps_b = conflict_fn_not_known ();
4666           *last_conflicts = chrec_dont_know;
4667         }
4668     }
4669   else
4670     {
4671       if (dump_file && (dump_flags & TDF_DETAILS))
4672         fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4673       *overlaps_a = conflict_fn_not_known ();
4674       *overlaps_b = conflict_fn_not_known ();
4675       *last_conflicts = chrec_dont_know;
4676     }
4677
4678 end_analyze_subs_aa:
4679   obstack_free (&scratch_obstack, NULL);
4680   if (dump_file && (dump_flags & TDF_DETAILS))
4681     {
4682       fprintf (dump_file, "  (overlaps_a = ");
4683       dump_conflict_function (dump_file, *overlaps_a);
4684       fprintf (dump_file, ")\n  (overlaps_b = ");
4685       dump_conflict_function (dump_file, *overlaps_b);
4686       fprintf (dump_file, "))\n");
4687     }
4688 }
4689
4690 /* Returns true when analyze_subscript_affine_affine can be used for
4691    determining the dependence relation between chrec_a and chrec_b,
4692    that contain symbols.  This function modifies chrec_a and chrec_b
4693    such that the analysis result is the same, and such that they don't
4694    contain symbols, and then can safely be passed to the analyzer.
4695
4696    Example: The analysis of the following tuples of evolutions produce
4697    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4698    vs. {0, +, 1}_1
4699
4700    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4701    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4702 */
4703
4704 static bool
4705 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4706 {
4707   tree diff, type, left_a, left_b, right_b;
4708
4709   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4710       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4711     /* FIXME: For the moment not handled.  Might be refined later.  */
4712     return false;
4713
4714   type = chrec_type (*chrec_a);
4715   left_a = CHREC_LEFT (*chrec_a);
4716   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4717   diff = chrec_fold_minus (type, left_a, left_b);
4718
4719   if (!evolution_function_is_constant_p (diff))
4720     return false;
4721
4722   if (dump_file && (dump_flags & TDF_DETAILS))
4723     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4724
4725   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4726                                      diff, CHREC_RIGHT (*chrec_a));
4727   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4728   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4729                                      build_int_cst (type, 0),
4730                                      right_b);
4731   return true;
4732 }
4733
4734 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
4735    *OVERLAPS_B are initialized to the functions that describe the
4736    relation between the elements accessed twice by CHREC_A and
4737    CHREC_B.  For k >= 0, the following property is verified:
4738
4739    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4740
4741 static void
4742 analyze_siv_subscript (tree chrec_a,
4743                        tree chrec_b,
4744                        conflict_function **overlaps_a,
4745                        conflict_function **overlaps_b,
4746                        tree *last_conflicts,
4747                        int loop_nest_num)
4748 {
4749   dependence_stats.num_siv++;
4750
4751   if (dump_file && (dump_flags & TDF_DETAILS))
4752     fprintf (dump_file, "(analyze_siv_subscript \n");
4753
4754   if (evolution_function_is_constant_p (chrec_a)
4755       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4756     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4757                                       overlaps_a, overlaps_b, last_conflicts);
4758
4759   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4760            && evolution_function_is_constant_p (chrec_b))
4761     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4762                                       overlaps_b, overlaps_a, last_conflicts);
4763
4764   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4765            && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4766     {
4767       if (!chrec_contains_symbols (chrec_a)
4768           && !chrec_contains_symbols (chrec_b))
4769         {
4770           analyze_subscript_affine_affine (chrec_a, chrec_b,
4771                                            overlaps_a, overlaps_b,
4772                                            last_conflicts);
4773
4774           if (CF_NOT_KNOWN_P (*overlaps_a)
4775               || CF_NOT_KNOWN_P (*overlaps_b))
4776             dependence_stats.num_siv_unimplemented++;
4777           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4778                    || CF_NO_DEPENDENCE_P (*overlaps_b))
4779             dependence_stats.num_siv_independent++;
4780           else
4781             dependence_stats.num_siv_dependent++;
4782         }
4783       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4784                                                         &chrec_b))
4785         {
4786           analyze_subscript_affine_affine (chrec_a, chrec_b,
4787                                            overlaps_a, overlaps_b,
4788                                            last_conflicts);
4789
4790           if (CF_NOT_KNOWN_P (*overlaps_a)
4791               || CF_NOT_KNOWN_P (*overlaps_b))
4792             dependence_stats.num_siv_unimplemented++;
4793           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4794                    || CF_NO_DEPENDENCE_P (*overlaps_b))
4795             dependence_stats.num_siv_independent++;
4796           else
4797             dependence_stats.num_siv_dependent++;
4798         }
4799       else
4800         goto siv_subscript_dontknow;
4801     }
4802
4803   else
4804     {
4805     siv_subscript_dontknow:;
4806       if (dump_file && (dump_flags & TDF_DETAILS))
4807         fprintf (dump_file, "  siv test failed: unimplemented");
4808       *overlaps_a = conflict_fn_not_known ();
4809       *overlaps_b = conflict_fn_not_known ();
4810       *last_conflicts = chrec_dont_know;
4811       dependence_stats.num_siv_unimplemented++;
4812     }
4813
4814   if (dump_file && (dump_flags & TDF_DETAILS))
4815     fprintf (dump_file, ")\n");
4816 }
4817
4818 /* Returns false if we can prove that the greatest common divisor of the steps
4819    of CHREC does not divide CST, false otherwise.  */
4820
4821 static bool
4822 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4823 {
4824   HOST_WIDE_INT cd = 0, val;
4825   tree step;
4826
4827   if (!tree_fits_shwi_p (cst))
4828     return true;
4829   val = tree_to_shwi (cst);
4830
4831   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4832     {
4833       step = CHREC_RIGHT (chrec);
4834       if (!tree_fits_shwi_p (step))
4835         return true;
4836       cd = gcd (cd, tree_to_shwi (step));
4837       chrec = CHREC_LEFT (chrec);
4838     }
4839
4840   return val % cd == 0;
4841 }
4842
4843 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4844    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
4845    functions that describe the relation between the elements accessed
4846    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
4847    is verified:
4848
4849    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4850
4851 static void
4852 analyze_miv_subscript (tree chrec_a,
4853                        tree chrec_b,
4854                        conflict_function **overlaps_a,
4855                        conflict_function **overlaps_b,
4856                        tree *last_conflicts,
4857                        class loop *loop_nest)
4858 {
4859   tree type, difference;
4860
4861   dependence_stats.num_miv++;
4862   if (dump_file && (dump_flags & TDF_DETAILS))
4863     fprintf (dump_file, "(analyze_miv_subscript \n");
4864
4865   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4866   chrec_a = chrec_convert (type, chrec_a, NULL);
4867   chrec_b = chrec_convert (type, chrec_b, NULL);
4868   difference = chrec_fold_minus (type, chrec_a, chrec_b);
4869
4870   if (eq_evolutions_p (chrec_a, chrec_b))
4871     {
4872       /* Access functions are the same: all the elements are accessed
4873          in the same order.  */
4874       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4875       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4876       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4877       dependence_stats.num_miv_dependent++;
4878     }
4879
4880   else if (evolution_function_is_constant_p (difference)
4881            && evolution_function_is_affine_multivariate_p (chrec_a,
4882                                                            loop_nest->num)
4883            && !gcd_of_steps_may_divide_p (chrec_a, difference))
4884     {
4885       /* testsuite/.../ssa-chrec-33.c
4886          {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
4887
4888          The difference is 1, and all the evolution steps are multiples
4889          of 2, consequently there are no overlapping elements.  */
4890       *overlaps_a = conflict_fn_no_dependence ();
4891       *overlaps_b = conflict_fn_no_dependence ();
4892       *last_conflicts = integer_zero_node;
4893       dependence_stats.num_miv_independent++;
4894     }
4895
4896   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4897            && !chrec_contains_symbols (chrec_a, loop_nest)
4898            && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4899            && !chrec_contains_symbols (chrec_b, loop_nest))
4900     {
4901       /* testsuite/.../ssa-chrec-35.c
4902          {0, +, 1}_2  vs.  {0, +, 1}_3
4903          the overlapping elements are respectively located at iterations:
4904          {0, +, 1}_x and {0, +, 1}_x,
4905          in other words, we have the equality:
4906          {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4907
4908          Other examples:
4909          {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4910          {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4911
4912          {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4913          {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4914       */
4915       analyze_subscript_affine_affine (chrec_a, chrec_b,
4916                                        overlaps_a, overlaps_b, last_conflicts);
4917
4918       if (CF_NOT_KNOWN_P (*overlaps_a)
4919           || CF_NOT_KNOWN_P (*overlaps_b))
4920         dependence_stats.num_miv_unimplemented++;
4921       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4922                || CF_NO_DEPENDENCE_P (*overlaps_b))
4923         dependence_stats.num_miv_independent++;
4924       else
4925         dependence_stats.num_miv_dependent++;
4926     }
4927
4928   else
4929     {
4930       /* When the analysis is too difficult, answer "don't know".  */
4931       if (dump_file && (dump_flags & TDF_DETAILS))
4932         fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4933
4934       *overlaps_a = conflict_fn_not_known ();
4935       *overlaps_b = conflict_fn_not_known ();
4936       *last_conflicts = chrec_dont_know;
4937       dependence_stats.num_miv_unimplemented++;
4938     }
4939
4940   if (dump_file && (dump_flags & TDF_DETAILS))
4941     fprintf (dump_file, ")\n");
4942 }
4943
4944 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4945    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
4946    OVERLAP_ITERATIONS_B are initialized with two functions that
4947    describe the iterations that contain conflicting elements.
4948
4949    Remark: For an integer k >= 0, the following equality is true:
4950
4951    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4952 */
4953
4954 static void
4955 analyze_overlapping_iterations (tree chrec_a,
4956                                 tree chrec_b,
4957                                 conflict_function **overlap_iterations_a,
4958                                 conflict_function **overlap_iterations_b,
4959                                 tree *last_conflicts, class loop *loop_nest)
4960 {
4961   unsigned int lnn = loop_nest->num;
4962
4963   dependence_stats.num_subscript_tests++;
4964
4965   if (dump_file && (dump_flags & TDF_DETAILS))
4966     {
4967       fprintf (dump_file, "(analyze_overlapping_iterations \n");
4968       fprintf (dump_file, "  (chrec_a = ");
4969       print_generic_expr (dump_file, chrec_a);
4970       fprintf (dump_file, ")\n  (chrec_b = ");
4971       print_generic_expr (dump_file, chrec_b);
4972       fprintf (dump_file, ")\n");
4973     }
4974
4975   if (chrec_a == NULL_TREE
4976       || chrec_b == NULL_TREE
4977       || chrec_contains_undetermined (chrec_a)
4978       || chrec_contains_undetermined (chrec_b))
4979     {
4980       dependence_stats.num_subscript_undetermined++;
4981
4982       *overlap_iterations_a = conflict_fn_not_known ();
4983       *overlap_iterations_b = conflict_fn_not_known ();
4984     }
4985
4986   /* If they are the same chrec, and are affine, they overlap
4987      on every iteration.  */
4988   else if (eq_evolutions_p (chrec_a, chrec_b)
4989            && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4990                || operand_equal_p (chrec_a, chrec_b, 0)))
4991     {
4992       dependence_stats.num_same_subscript_function++;
4993       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4994       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4995       *last_conflicts = chrec_dont_know;
4996     }
4997
4998   /* If they aren't the same, and aren't affine, we can't do anything
4999      yet.  */
5000   else if ((chrec_contains_symbols (chrec_a)
5001             || chrec_contains_symbols (chrec_b))
5002            && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5003                || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
5004     {
5005       dependence_stats.num_subscript_undetermined++;
5006       *overlap_iterations_a = conflict_fn_not_known ();
5007       *overlap_iterations_b = conflict_fn_not_known ();
5008     }
5009
5010   else if (ziv_subscript_p (chrec_a, chrec_b))
5011     analyze_ziv_subscript (chrec_a, chrec_b,
5012                            overlap_iterations_a, overlap_iterations_b,
5013                            last_conflicts);
5014
5015   else if (siv_subscript_p (chrec_a, chrec_b))
5016     analyze_siv_subscript (chrec_a, chrec_b,
5017                            overlap_iterations_a, overlap_iterations_b,
5018                            last_conflicts, lnn);
5019
5020   else
5021     analyze_miv_subscript (chrec_a, chrec_b,
5022                            overlap_iterations_a, overlap_iterations_b,
5023                            last_conflicts, loop_nest);
5024
5025   if (dump_file && (dump_flags & TDF_DETAILS))
5026     {
5027       fprintf (dump_file, "  (overlap_iterations_a = ");
5028       dump_conflict_function (dump_file, *overlap_iterations_a);
5029       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
5030       dump_conflict_function (dump_file, *overlap_iterations_b);
5031       fprintf (dump_file, "))\n");
5032     }
5033 }
5034
5035 /* Helper function for uniquely inserting distance vectors.  */
5036
5037 static void
5038 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
5039 {
5040   for (lambda_vector v : DDR_DIST_VECTS (ddr))
5041     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
5042       return;
5043
5044   DDR_DIST_VECTS (ddr).safe_push (dist_v);
5045 }
5046
5047 /* Helper function for uniquely inserting direction vectors.  */
5048
5049 static void
5050 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
5051 {
5052   for (lambda_vector v : DDR_DIR_VECTS (ddr))
5053     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
5054       return;
5055
5056   DDR_DIR_VECTS (ddr).safe_push (dir_v);
5057 }
5058
5059 /* Add a distance of 1 on all the loops outer than INDEX.  If we
5060    haven't yet determined a distance for this outer loop, push a new
5061    distance vector composed of the previous distance, and a distance
5062    of 1 for this outer loop.  Example:
5063
5064    | loop_1
5065    |   loop_2
5066    |     A[10]
5067    |   endloop_2
5068    | endloop_1
5069
5070    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
5071    save (0, 1), then we have to save (1, 0).  */
5072
5073 static void
5074 add_outer_distances (struct data_dependence_relation *ddr,
5075                      lambda_vector dist_v, int index)
5076 {
5077   /* For each outer loop where init_v is not set, the accesses are
5078      in dependence of distance 1 in the loop.  */
5079   while (--index >= 0)
5080     {
5081       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5082       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5083       save_v[index] = 1;
5084       save_dist_v (ddr, save_v);
5085     }
5086 }
5087
5088 /* Return false when fail to represent the data dependence as a
5089    distance vector.  A_INDEX is the index of the first reference
5090    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5091    second reference.  INIT_B is set to true when a component has been
5092    added to the distance vector DIST_V.  INDEX_CARRY is then set to
5093    the index in DIST_V that carries the dependence.  */
5094
5095 static bool
5096 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
5097                              unsigned int a_index, unsigned int b_index,
5098                              lambda_vector dist_v, bool *init_b,
5099                              int *index_carry)
5100 {
5101   unsigned i;
5102   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5103   class loop *loop = DDR_LOOP_NEST (ddr)[0];
5104
5105   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5106     {
5107       tree access_fn_a, access_fn_b;
5108       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
5109
5110       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5111         {
5112           non_affine_dependence_relation (ddr);
5113           return false;
5114         }
5115
5116       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
5117       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
5118
5119       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
5120           && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
5121         {
5122           HOST_WIDE_INT dist;
5123           int index;
5124           int var_a = CHREC_VARIABLE (access_fn_a);
5125           int var_b = CHREC_VARIABLE (access_fn_b);
5126
5127           if (var_a != var_b
5128               || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5129             {
5130               non_affine_dependence_relation (ddr);
5131               return false;
5132             }
5133
5134           /* When data references are collected in a loop while data
5135              dependences are analyzed in loop nest nested in the loop, we
5136              would have more number of access functions than number of
5137              loops.  Skip access functions of loops not in the loop nest.
5138
5139              See PR89725 for more information.  */
5140           if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
5141             continue;
5142
5143           dist = int_cst_value (SUB_DISTANCE (subscript));
5144           index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
5145           *index_carry = MIN (index, *index_carry);
5146
5147           /* This is the subscript coupling test.  If we have already
5148              recorded a distance for this loop (a distance coming from
5149              another subscript), it should be the same.  For example,
5150              in the following code, there is no dependence:
5151
5152              | loop i = 0, N, 1
5153              |   T[i+1][i] = ...
5154              |   ... = T[i][i]
5155              | endloop
5156           */
5157           if (init_v[index] != 0 && dist_v[index] != dist)
5158             {
5159               finalize_ddr_dependent (ddr, chrec_known);
5160               return false;
5161             }
5162
5163           dist_v[index] = dist;
5164           init_v[index] = 1;
5165           *init_b = true;
5166         }
5167       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
5168         {
5169           /* This can be for example an affine vs. constant dependence
5170              (T[i] vs. T[3]) that is not an affine dependence and is
5171              not representable as a distance vector.  */
5172           non_affine_dependence_relation (ddr);
5173           return false;
5174         }
5175       else
5176         *init_b = true;
5177     }
5178
5179   return true;
5180 }
5181
5182 /* Return true when the DDR contains only invariant access functions wrto. loop
5183    number LNUM.  */
5184
5185 static bool
5186 invariant_access_functions (const struct data_dependence_relation *ddr,
5187                             int lnum)
5188 {
5189   for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5190     if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
5191         || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
5192       return false;
5193
5194   return true;
5195 }
5196
5197 /* Helper function for the case where DDR_A and DDR_B are the same
5198    multivariate access function with a constant step.  For an example
5199    see pr34635-1.c.  */
5200
5201 static void
5202 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
5203 {
5204   int x_1, x_2;
5205   tree c_1 = CHREC_LEFT (c_2);
5206   tree c_0 = CHREC_LEFT (c_1);
5207   lambda_vector dist_v;
5208   HOST_WIDE_INT v1, v2, cd;
5209
5210   /* Polynomials with more than 2 variables are not handled yet.  When
5211      the evolution steps are parameters, it is not possible to
5212      represent the dependence using classical distance vectors.  */
5213   if (TREE_CODE (c_0) != INTEGER_CST
5214       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
5215       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
5216     {
5217       DDR_AFFINE_P (ddr) = false;
5218       return;
5219     }
5220
5221   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
5222   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
5223
5224   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
5225   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5226   v1 = int_cst_value (CHREC_RIGHT (c_1));
5227   v2 = int_cst_value (CHREC_RIGHT (c_2));
5228   cd = gcd (v1, v2);
5229   v1 /= cd;
5230   v2 /= cd;
5231
5232   if (v2 < 0)
5233     {
5234       v2 = -v2;
5235       v1 = -v1;
5236     }
5237
5238   dist_v[x_1] = v2;
5239   dist_v[x_2] = -v1;
5240   save_dist_v (ddr, dist_v);
5241
5242   add_outer_distances (ddr, dist_v, x_1);
5243 }
5244
5245 /* Helper function for the case where DDR_A and DDR_B are the same
5246    access functions.  */
5247
5248 static void
5249 add_other_self_distances (struct data_dependence_relation *ddr)
5250 {
5251   lambda_vector dist_v;
5252   unsigned i;
5253   int index_carry = DDR_NB_LOOPS (ddr);
5254   subscript *sub;
5255   class loop *loop = DDR_LOOP_NEST (ddr)[0];
5256
5257   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5258     {
5259       tree access_fun = SUB_ACCESS_FN (sub, 0);
5260
5261       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
5262         {
5263           if (!evolution_function_is_univariate_p (access_fun, loop->num))
5264             {
5265               if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
5266                 {
5267                   DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
5268                   return;
5269                 }
5270
5271               access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
5272
5273               if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
5274                 add_multivariate_self_dist (ddr, access_fun);
5275               else
5276                 /* The evolution step is not constant: it varies in
5277                    the outer loop, so this cannot be represented by a
5278                    distance vector.  For example in pr34635.c the
5279                    evolution is {0, +, {0, +, 4}_1}_2.  */
5280                 DDR_AFFINE_P (ddr) = false;
5281
5282               return;
5283             }
5284
5285           /* When data references are collected in a loop while data
5286              dependences are analyzed in loop nest nested in the loop, we
5287              would have more number of access functions than number of
5288              loops.  Skip access functions of loops not in the loop nest.
5289
5290              See PR89725 for more information.  */
5291           if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
5292                                   loop))
5293             continue;
5294
5295           index_carry = MIN (index_carry,
5296                              index_in_loop_nest (CHREC_VARIABLE (access_fun),
5297                                                  DDR_LOOP_NEST (ddr)));
5298         }
5299     }
5300
5301   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5302   add_outer_distances (ddr, dist_v, index_carry);
5303 }
5304
5305 static void
5306 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
5307 {
5308   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5309
5310   dist_v[0] = 1;
5311   save_dist_v (ddr, dist_v);
5312 }
5313
5314 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
5315    is the case for example when access functions are the same and
5316    equal to a constant, as in:
5317
5318    | loop_1
5319    |   A[3] = ...
5320    |   ... = A[3]
5321    | endloop_1
5322
5323    in which case the distance vectors are (0) and (1).  */
5324
5325 static void
5326 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
5327 {
5328   unsigned i, j;
5329
5330   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5331     {
5332       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
5333       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
5334       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
5335
5336       for (j = 0; j < ca->n; j++)
5337         if (affine_function_zero_p (ca->fns[j]))
5338           {
5339             insert_innermost_unit_dist_vector (ddr);
5340             return;
5341           }
5342
5343       for (j = 0; j < cb->n; j++)
5344         if (affine_function_zero_p (cb->fns[j]))
5345           {
5346             insert_innermost_unit_dist_vector (ddr);
5347             return;
5348           }
5349     }
5350 }
5351
5352 /* Return true when the DDR contains two data references that have the
5353    same access functions.  */
5354
5355 static inline bool
5356 same_access_functions (const struct data_dependence_relation *ddr)
5357 {
5358   for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5359     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5360                           SUB_ACCESS_FN (sub, 1)))
5361       return false;
5362
5363   return true;
5364 }
5365
5366 /* Compute the classic per loop distance vector.  DDR is the data
5367    dependence relation to build a vector from.  Return false when fail
5368    to represent the data dependence as a distance vector.  */
5369
5370 static bool
5371 build_classic_dist_vector (struct data_dependence_relation *ddr,
5372                            class loop *loop_nest)
5373 {
5374   bool init_b = false;
5375   int index_carry = DDR_NB_LOOPS (ddr);
5376   lambda_vector dist_v;
5377
5378   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5379     return false;
5380
5381   if (same_access_functions (ddr))
5382     {
5383       /* Save the 0 vector.  */
5384       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5385       save_dist_v (ddr, dist_v);
5386
5387       if (invariant_access_functions (ddr, loop_nest->num))
5388         add_distance_for_zero_overlaps (ddr);
5389
5390       if (DDR_NB_LOOPS (ddr) > 1)
5391         add_other_self_distances (ddr);
5392
5393       return true;
5394     }
5395
5396   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5397   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5398     return false;
5399
5400   /* Save the distance vector if we initialized one.  */
5401   if (init_b)
5402     {
5403       /* Verify a basic constraint: classic distance vectors should
5404          always be lexicographically positive.
5405
5406          Data references are collected in the order of execution of
5407          the program, thus for the following loop
5408
5409          | for (i = 1; i < 100; i++)
5410          |   for (j = 1; j < 100; j++)
5411          |     {
5412          |       t = T[j+1][i-1];  // A
5413          |       T[j][i] = t + 2;  // B
5414          |     }
5415
5416          references are collected following the direction of the wind:
5417          A then B.  The data dependence tests are performed also
5418          following this order, such that we're looking at the distance
5419          separating the elements accessed by A from the elements later
5420          accessed by B.  But in this example, the distance returned by
5421          test_dep (A, B) is lexicographically negative (-1, 1), that
5422          means that the access A occurs later than B with respect to
5423          the outer loop, ie. we're actually looking upwind.  In this
5424          case we solve test_dep (B, A) looking downwind to the
5425          lexicographically positive solution, that returns the
5426          distance vector (1, -1).  */
5427       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5428         {
5429           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5430           if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5431             return false;
5432           compute_subscript_distance (ddr);
5433           if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5434                                             &index_carry))
5435             return false;
5436           save_dist_v (ddr, save_v);
5437           DDR_REVERSED_P (ddr) = true;
5438
5439           /* In this case there is a dependence forward for all the
5440              outer loops:
5441
5442              | for (k = 1; k < 100; k++)
5443              |  for (i = 1; i < 100; i++)
5444              |   for (j = 1; j < 100; j++)
5445              |     {
5446              |       t = T[j+1][i-1];  // A
5447              |       T[j][i] = t + 2;  // B
5448              |     }
5449
5450              the vectors are:
5451              (0,  1, -1)
5452              (1,  1, -1)
5453              (1, -1,  1)
5454           */
5455           if (DDR_NB_LOOPS (ddr) > 1)
5456             {
5457               add_outer_distances (ddr, save_v, index_carry);
5458               add_outer_distances (ddr, dist_v, index_carry);
5459             }
5460         }
5461       else
5462         {
5463           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5464           lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5465
5466           if (DDR_NB_LOOPS (ddr) > 1)
5467             {
5468               lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5469
5470               if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5471                 return false;
5472               compute_subscript_distance (ddr);
5473               if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5474                                                 &index_carry))
5475                 return false;
5476
5477               save_dist_v (ddr, save_v);
5478               add_outer_distances (ddr, dist_v, index_carry);
5479               add_outer_distances (ddr, opposite_v, index_carry);
5480             }
5481           else
5482             save_dist_v (ddr, save_v);
5483         }
5484     }
5485   else
5486     {
5487       /* There is a distance of 1 on all the outer loops: Example:
5488          there is a dependence of distance 1 on loop_1 for the array A.
5489
5490          | loop_1
5491          |   A[5] = ...
5492          | endloop
5493       */
5494       add_outer_distances (ddr, dist_v,
5495                            lambda_vector_first_nz (dist_v,
5496                                                    DDR_NB_LOOPS (ddr), 0));
5497     }
5498
5499   if (dump_file && (dump_flags & TDF_DETAILS))
5500     {
5501       unsigned i;
5502
5503       fprintf (dump_file, "(build_classic_dist_vector\n");
5504       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5505         {
5506           fprintf (dump_file, "  dist_vector = (");
5507           print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5508                                DDR_NB_LOOPS (ddr));
5509           fprintf (dump_file, "  )\n");
5510         }
5511       fprintf (dump_file, ")\n");
5512     }
5513
5514   return true;
5515 }
5516
5517 /* Return the direction for a given distance.
5518    FIXME: Computing dir this way is suboptimal, since dir can catch
5519    cases that dist is unable to represent.  */
5520
5521 static inline enum data_dependence_direction
5522 dir_from_dist (int dist)
5523 {
5524   if (dist > 0)
5525     return dir_positive;
5526   else if (dist < 0)
5527     return dir_negative;
5528   else
5529     return dir_equal;
5530 }
5531
5532 /* Compute the classic per loop direction vector.  DDR is the data
5533    dependence relation to build a vector from.  */
5534
5535 static void
5536 build_classic_dir_vector (struct data_dependence_relation *ddr)
5537 {
5538   unsigned i, j;
5539   lambda_vector dist_v;
5540
5541   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5542     {
5543       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5544
5545       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5546         dir_v[j] = dir_from_dist (dist_v[j]);
5547
5548       save_dir_v (ddr, dir_v);
5549     }
5550 }
5551
5552 /* Helper function.  Returns true when there is a dependence between the
5553    data references.  A_INDEX is the index of the first reference (0 for
5554    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
5555
5556 static bool
5557 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5558                                unsigned int a_index, unsigned int b_index,
5559                                class loop *loop_nest)
5560 {
5561   unsigned int i;
5562   tree last_conflicts;
5563   struct subscript *subscript;
5564   tree res = NULL_TREE;
5565
5566   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5567     {
5568       conflict_function *overlaps_a, *overlaps_b;
5569
5570       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5571                                       SUB_ACCESS_FN (subscript, b_index),
5572                                       &overlaps_a, &overlaps_b,
5573                                       &last_conflicts, loop_nest);
5574
5575       if (SUB_CONFLICTS_IN_A (subscript))
5576         free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5577       if (SUB_CONFLICTS_IN_B (subscript))
5578         free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5579
5580       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5581       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5582       SUB_LAST_CONFLICT (subscript) = last_conflicts;
5583
5584       /* If there is any undetermined conflict function we have to
5585          give a conservative answer in case we cannot prove that
5586          no dependence exists when analyzing another subscript.  */
5587       if (CF_NOT_KNOWN_P (overlaps_a)
5588           || CF_NOT_KNOWN_P (overlaps_b))
5589         {
5590           res = chrec_dont_know;
5591           continue;
5592         }
5593
5594       /* When there is a subscript with no dependence we can stop.  */
5595       else if (CF_NO_DEPENDENCE_P (overlaps_a)
5596                || CF_NO_DEPENDENCE_P (overlaps_b))
5597         {
5598           res = chrec_known;
5599           break;
5600         }
5601     }
5602
5603   if (res == NULL_TREE)
5604     return true;
5605
5606   if (res == chrec_known)
5607     dependence_stats.num_dependence_independent++;
5608   else
5609     dependence_stats.num_dependence_undetermined++;
5610   finalize_ddr_dependent (ddr, res);
5611   return false;
5612 }
5613
5614 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
5615
5616 static void
5617 subscript_dependence_tester (struct data_dependence_relation *ddr,
5618                              class loop *loop_nest)
5619 {
5620   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5621     dependence_stats.num_dependence_dependent++;
5622
5623   compute_subscript_distance (ddr);
5624   if (build_classic_dist_vector (ddr, loop_nest))
5625     build_classic_dir_vector (ddr);
5626 }
5627
5628 /* Returns true when all the access functions of A are affine or
5629    constant with respect to LOOP_NEST.  */
5630
5631 static bool
5632 access_functions_are_affine_or_constant_p (const struct data_reference *a,
5633                                            const class loop *loop_nest)
5634 {
5635   vec<tree> fns = DR_ACCESS_FNS (a);
5636   for (tree t : fns)
5637     if (!evolution_function_is_invariant_p (t, loop_nest->num)
5638         && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5639       return false;
5640
5641   return true;
5642 }
5643
5644 /* This computes the affine dependence relation between A and B with
5645    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
5646    independence between two accesses, while CHREC_DONT_KNOW is used
5647    for representing the unknown relation.
5648
5649    Note that it is possible to stop the computation of the dependence
5650    relation the first time we detect a CHREC_KNOWN element for a given
5651    subscript.  */
5652
5653 void
5654 compute_affine_dependence (struct data_dependence_relation *ddr,
5655                            class loop *loop_nest)
5656 {
5657   struct data_reference *dra = DDR_A (ddr);
5658   struct data_reference *drb = DDR_B (ddr);
5659
5660   if (dump_file && (dump_flags & TDF_DETAILS))
5661     {
5662       fprintf (dump_file, "(compute_affine_dependence\n");
5663       fprintf (dump_file, "  ref_a: ");
5664       print_generic_expr (dump_file, DR_REF (dra));
5665       fprintf (dump_file, ", stmt_a: ");
5666       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5667       fprintf (dump_file, "  ref_b: ");
5668       print_generic_expr (dump_file, DR_REF (drb));
5669       fprintf (dump_file, ", stmt_b: ");
5670       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5671     }
5672
5673   /* Analyze only when the dependence relation is not yet known.  */
5674   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5675     {
5676       dependence_stats.num_dependence_tests++;
5677
5678       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5679           && access_functions_are_affine_or_constant_p (drb, loop_nest))
5680         subscript_dependence_tester (ddr, loop_nest);
5681
5682       /* As a last case, if the dependence cannot be determined, or if
5683          the dependence is considered too difficult to determine, answer
5684          "don't know".  */
5685       else
5686         {
5687           dependence_stats.num_dependence_undetermined++;
5688
5689           if (dump_file && (dump_flags & TDF_DETAILS))
5690             {
5691               fprintf (dump_file, "Data ref a:\n");
5692               dump_data_reference (dump_file, dra);
5693               fprintf (dump_file, "Data ref b:\n");
5694               dump_data_reference (dump_file, drb);
5695               fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5696             }
5697           finalize_ddr_dependent (ddr, chrec_dont_know);
5698         }
5699     }
5700
5701   if (dump_file && (dump_flags & TDF_DETAILS))
5702     {
5703       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5704         fprintf (dump_file, ") -> no dependence\n");
5705       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5706         fprintf (dump_file, ") -> dependence analysis failed\n");
5707       else
5708         fprintf (dump_file, ")\n");
5709     }
5710 }
5711
5712 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5713    the data references in DATAREFS, in the LOOP_NEST.  When
5714    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5715    relations.  Return true when successful, i.e. data references number
5716    is small enough to be handled.  */
5717
5718 bool
5719 compute_all_dependences (const vec<data_reference_p> &datarefs,
5720                          vec<ddr_p> *dependence_relations,
5721                          const vec<loop_p> &loop_nest,
5722                          bool compute_self_and_rr)
5723 {
5724   struct data_dependence_relation *ddr;
5725   struct data_reference *a, *b;
5726   unsigned int i, j;
5727
5728   if ((int) datarefs.length ()
5729       > param_loop_max_datarefs_for_datadeps)
5730     {
5731       struct data_dependence_relation *ddr;
5732
5733       /* Insert a single relation into dependence_relations:
5734          chrec_dont_know.  */
5735       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5736       dependence_relations->safe_push (ddr);
5737       return false;
5738     }
5739
5740   FOR_EACH_VEC_ELT (datarefs, i, a)
5741     for (j = i + 1; datarefs.iterate (j, &b); j++)
5742       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5743         {
5744           ddr = initialize_data_dependence_relation (a, b, loop_nest);
5745           dependence_relations->safe_push (ddr);
5746           if (loop_nest.exists ())
5747             compute_affine_dependence (ddr, loop_nest[0]);
5748         }
5749
5750   if (compute_self_and_rr)
5751     FOR_EACH_VEC_ELT (datarefs, i, a)
5752       {
5753         ddr = initialize_data_dependence_relation (a, a, loop_nest);
5754         dependence_relations->safe_push (ddr);
5755         if (loop_nest.exists ())
5756           compute_affine_dependence (ddr, loop_nest[0]);
5757       }
5758
5759   return true;
5760 }
5761
5762 /* Describes a location of a memory reference.  */
5763
5764 struct data_ref_loc
5765 {
5766   /* The memory reference.  */
5767   tree ref;
5768
5769   /* True if the memory reference is read.  */
5770   bool is_read;
5771
5772   /* True if the data reference is conditional within the containing
5773      statement, i.e. if it might not occur even when the statement
5774      is executed and runs to completion.  */
5775   bool is_conditional_in_stmt;
5776 };
5777
5778
5779 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
5780    true if STMT clobbers memory, false otherwise.  */
5781
5782 static bool
5783 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5784 {
5785   bool clobbers_memory = false;
5786   data_ref_loc ref;
5787   tree op0, op1;
5788   enum gimple_code stmt_code = gimple_code (stmt);
5789
5790   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5791      As we cannot model data-references to not spelled out
5792      accesses give up if they may occur.  */
5793   if (stmt_code == GIMPLE_CALL
5794       && !(gimple_call_flags (stmt) & ECF_CONST))
5795     {
5796       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
5797       if (gimple_call_internal_p (stmt))
5798         switch (gimple_call_internal_fn (stmt))
5799           {
5800           case IFN_GOMP_SIMD_LANE:
5801             {
5802               class loop *loop = gimple_bb (stmt)->loop_father;
5803               tree uid = gimple_call_arg (stmt, 0);
5804               gcc_assert (TREE_CODE (uid) == SSA_NAME);
5805               if (loop == NULL
5806                   || loop->simduid != SSA_NAME_VAR (uid))
5807                 clobbers_memory = true;
5808               break;
5809             }
5810           case IFN_MASK_LOAD:
5811           case IFN_MASK_STORE:
5812             break;
5813           default:
5814             clobbers_memory = true;
5815             break;
5816           }
5817       else
5818         clobbers_memory = true;
5819     }
5820   else if (stmt_code == GIMPLE_ASM
5821            && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5822                || gimple_vuse (stmt)))
5823     clobbers_memory = true;
5824
5825   if (!gimple_vuse (stmt))
5826     return clobbers_memory;
5827
5828   if (stmt_code == GIMPLE_ASSIGN)
5829     {
5830       tree base;
5831       op0 = gimple_assign_lhs (stmt);
5832       op1 = gimple_assign_rhs1 (stmt);
5833
5834       if (DECL_P (op1)
5835           || (REFERENCE_CLASS_P (op1)
5836               && (base = get_base_address (op1))
5837               && TREE_CODE (base) != SSA_NAME
5838               && !is_gimple_min_invariant (base)))
5839         {
5840           ref.ref = op1;
5841           ref.is_read = true;
5842           ref.is_conditional_in_stmt = false;
5843           references->safe_push (ref);
5844         }
5845     }
5846   else if (stmt_code == GIMPLE_CALL)
5847     {
5848       unsigned i, n;
5849       tree ptr, type;
5850       unsigned int align;
5851
5852       ref.is_read = false;
5853       if (gimple_call_internal_p (stmt))
5854         switch (gimple_call_internal_fn (stmt))
5855           {
5856           case IFN_MASK_LOAD:
5857             if (gimple_call_lhs (stmt) == NULL_TREE)
5858               break;
5859             ref.is_read = true;
5860             /* FALLTHRU */
5861           case IFN_MASK_STORE:
5862             ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5863             align = tree_to_shwi (gimple_call_arg (stmt, 1));
5864             if (ref.is_read)
5865               type = TREE_TYPE (gimple_call_lhs (stmt));
5866             else
5867               type = TREE_TYPE (gimple_call_arg (stmt, 3));
5868             if (TYPE_ALIGN (type) != align)
5869               type = build_aligned_type (type, align);
5870             ref.is_conditional_in_stmt = true;
5871             ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5872                                    ptr);
5873             references->safe_push (ref);
5874             return false;
5875           default:
5876             break;
5877           }
5878
5879       op0 = gimple_call_lhs (stmt);
5880       n = gimple_call_num_args (stmt);
5881       for (i = 0; i < n; i++)
5882         {
5883           op1 = gimple_call_arg (stmt, i);
5884
5885           if (DECL_P (op1)
5886               || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5887             {
5888               ref.ref = op1;
5889               ref.is_read = true;
5890               ref.is_conditional_in_stmt = false;
5891               references->safe_push (ref);
5892             }
5893         }
5894     }
5895   else
5896     return clobbers_memory;
5897
5898   if (op0
5899       && (DECL_P (op0)
5900           || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5901     {
5902       ref.ref = op0;
5903       ref.is_read = false;
5904       ref.is_conditional_in_stmt = false;
5905       references->safe_push (ref);
5906     }
5907   return clobbers_memory;
5908 }
5909
5910
5911 /* Returns true if the loop-nest has any data reference.  */
5912
5913 bool
5914 loop_nest_has_data_refs (loop_p loop)
5915 {
5916   basic_block *bbs = get_loop_body (loop);
5917   auto_vec<data_ref_loc, 3> references;
5918
5919   for (unsigned i = 0; i < loop->num_nodes; i++)
5920     {
5921       basic_block bb = bbs[i];
5922       gimple_stmt_iterator bsi;
5923
5924       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5925         {
5926           gimple *stmt = gsi_stmt (bsi);
5927           get_references_in_stmt (stmt, &references);
5928           if (references.length ())
5929             {
5930               free (bbs);
5931               return true;
5932             }
5933         }
5934     }
5935   free (bbs);
5936   return false;
5937 }
5938
5939 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
5940    reference, returns false, otherwise returns true.  NEST is the outermost
5941    loop of the loop nest in which the references should be analyzed.  */
5942
5943 opt_result
5944 find_data_references_in_stmt (class loop *nest, gimple *stmt,
5945                               vec<data_reference_p> *datarefs)
5946 {
5947   auto_vec<data_ref_loc, 2> references;
5948   data_reference_p dr;
5949
5950   if (get_references_in_stmt (stmt, &references))
5951     return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5952                                    stmt);
5953
5954   for (const data_ref_loc &ref : references)
5955     {
5956       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5957                             loop_containing_stmt (stmt), ref.ref,
5958                             stmt, ref.is_read, ref.is_conditional_in_stmt);
5959       gcc_assert (dr != NULL);
5960       datarefs->safe_push (dr);
5961     }
5962
5963   return opt_result::success ();
5964 }
5965
5966 /* Stores the data references in STMT to DATAREFS.  If there is an
5967    unanalyzable reference, returns false, otherwise returns true.
5968    NEST is the outermost loop of the loop nest in which the references
5969    should be instantiated, LOOP is the loop in which the references
5970    should be analyzed.  */
5971
5972 bool
5973 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5974                                        vec<data_reference_p> *datarefs)
5975 {
5976   auto_vec<data_ref_loc, 2> references;
5977   bool ret = true;
5978   data_reference_p dr;
5979
5980   if (get_references_in_stmt (stmt, &references))
5981     return false;
5982
5983   for (const data_ref_loc &ref : references)
5984     {
5985       dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read,
5986                             ref.is_conditional_in_stmt);
5987       gcc_assert (dr != NULL);
5988       datarefs->safe_push (dr);
5989     }
5990
5991   return ret;
5992 }
5993
5994 /* Search the data references in LOOP, and record the information into
5995    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5996    difficult case, returns NULL_TREE otherwise.  */
5997
5998 tree
5999 find_data_references_in_bb (class loop *loop, basic_block bb,
6000                             vec<data_reference_p> *datarefs)
6001 {
6002   gimple_stmt_iterator bsi;
6003
6004   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
6005     {
6006       gimple *stmt = gsi_stmt (bsi);
6007
6008       if (!find_data_references_in_stmt (loop, stmt, datarefs))
6009         {
6010           struct data_reference *res;
6011           res = XCNEW (struct data_reference);
6012           datarefs->safe_push (res);
6013
6014           return chrec_dont_know;
6015         }
6016     }
6017
6018   return NULL_TREE;
6019 }
6020
6021 /* Search the data references in LOOP, and record the information into
6022    DATAREFS.  Returns chrec_dont_know when failing to analyze a
6023    difficult case, returns NULL_TREE otherwise.
6024
6025    TODO: This function should be made smarter so that it can handle address
6026    arithmetic as if they were array accesses, etc.  */
6027
6028 tree
6029 find_data_references_in_loop (class loop *loop,
6030                               vec<data_reference_p> *datarefs)
6031 {
6032   basic_block bb, *bbs;
6033   unsigned int i;
6034
6035   bbs = get_loop_body_in_dom_order (loop);
6036
6037   for (i = 0; i < loop->num_nodes; i++)
6038     {
6039       bb = bbs[i];
6040
6041       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
6042         {
6043           free (bbs);
6044           return chrec_dont_know;
6045         }
6046     }
6047   free (bbs);
6048
6049   return NULL_TREE;
6050 }
6051
6052 /* Return the alignment in bytes that DRB is guaranteed to have at all
6053    times.  */
6054
6055 unsigned int
6056 dr_alignment (innermost_loop_behavior *drb)
6057 {
6058   /* Get the alignment of BASE_ADDRESS + INIT.  */
6059   unsigned int alignment = drb->base_alignment;
6060   unsigned int misalignment = (drb->base_misalignment
6061                                + TREE_INT_CST_LOW (drb->init));
6062   if (misalignment != 0)
6063     alignment = MIN (alignment, misalignment & -misalignment);
6064
6065   /* Cap it to the alignment of OFFSET.  */
6066   if (!integer_zerop (drb->offset))
6067     alignment = MIN (alignment, drb->offset_alignment);
6068
6069   /* Cap it to the alignment of STEP.  */
6070   if (!integer_zerop (drb->step))
6071     alignment = MIN (alignment, drb->step_alignment);
6072
6073   return alignment;
6074 }
6075
6076 /* If BASE is a pointer-typed SSA name, try to find the object that it
6077    is based on.  Return this object X on success and store the alignment
6078    in bytes of BASE - &X in *ALIGNMENT_OUT.  */
6079
6080 static tree
6081 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
6082 {
6083   if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
6084     return NULL_TREE;
6085
6086   gimple *def = SSA_NAME_DEF_STMT (base);
6087   base = analyze_scalar_evolution (loop_containing_stmt (def), base);
6088
6089   /* Peel chrecs and record the minimum alignment preserved by
6090      all steps.  */
6091   unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6092   while (TREE_CODE (base) == POLYNOMIAL_CHREC)
6093     {
6094       unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
6095       alignment = MIN (alignment, step_alignment);
6096       base = CHREC_LEFT (base);
6097     }
6098
6099   /* Punt if the expression is too complicated to handle.  */
6100   if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
6101     return NULL_TREE;
6102
6103   /* The only useful cases are those for which a dereference folds to something
6104      other than an INDIRECT_REF.  */
6105   tree ref_type = TREE_TYPE (TREE_TYPE (base));
6106   tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
6107   if (!ref)
6108     return NULL_TREE;
6109
6110   /* Analyze the base to which the steps we peeled were applied.  */
6111   poly_int64 bitsize, bitpos, bytepos;
6112   machine_mode mode;
6113   int unsignedp, reversep, volatilep;
6114   tree offset;
6115   base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
6116                               &unsignedp, &reversep, &volatilep);
6117   if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
6118     return NULL_TREE;
6119
6120   /* Restrict the alignment to that guaranteed by the offsets.  */
6121   unsigned int bytepos_alignment = known_alignment (bytepos);
6122   if (bytepos_alignment != 0)
6123     alignment = MIN (alignment, bytepos_alignment);
6124   if (offset)
6125     {
6126       unsigned int offset_alignment = highest_pow2_factor (offset);
6127       alignment = MIN (alignment, offset_alignment);
6128     }
6129
6130   *alignment_out = alignment;
6131   return base;
6132 }
6133
6134 /* Return the object whose alignment would need to be changed in order
6135    to increase the alignment of ADDR.  Store the maximum achievable
6136    alignment in *MAX_ALIGNMENT.  */
6137
6138 tree
6139 get_base_for_alignment (tree addr, unsigned int *max_alignment)
6140 {
6141   tree base = get_base_for_alignment_1 (addr, max_alignment);
6142   if (base)
6143     return base;
6144
6145   if (TREE_CODE (addr) == ADDR_EXPR)
6146     addr = TREE_OPERAND (addr, 0);
6147   *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6148   return addr;
6149 }
6150
6151 /* Recursive helper function.  */
6152
6153 static bool
6154 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
6155 {
6156   /* Inner loops of the nest should not contain siblings.  Example:
6157      when there are two consecutive loops,
6158
6159      | loop_0
6160      |   loop_1
6161      |     A[{0, +, 1}_1]
6162      |   endloop_1
6163      |   loop_2
6164      |     A[{0, +, 1}_2]
6165      |   endloop_2
6166      | endloop_0
6167
6168      the dependence relation cannot be captured by the distance
6169      abstraction.  */
6170   if (loop->next)
6171     return false;
6172
6173   loop_nest->safe_push (loop);
6174   if (loop->inner)
6175     return find_loop_nest_1 (loop->inner, loop_nest);
6176   return true;
6177 }
6178
6179 /* Return false when the LOOP is not well nested.  Otherwise return
6180    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
6181    contain the loops from the outermost to the innermost, as they will
6182    appear in the classic distance vector.  */
6183
6184 bool
6185 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
6186 {
6187   loop_nest->safe_push (loop);
6188   if (loop->inner)
6189     return find_loop_nest_1 (loop->inner, loop_nest);
6190   return true;
6191 }
6192
6193 /* Returns true when the data dependences have been computed, false otherwise.
6194    Given a loop nest LOOP, the following vectors are returned:
6195    DATAREFS is initialized to all the array elements contained in this loop,
6196    DEPENDENCE_RELATIONS contains the relations between the data references.
6197    Compute read-read and self relations if
6198    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
6199
6200 bool
6201 compute_data_dependences_for_loop (class loop *loop,
6202                                    bool compute_self_and_read_read_dependences,
6203                                    vec<loop_p> *loop_nest,
6204                                    vec<data_reference_p> *datarefs,
6205                                    vec<ddr_p> *dependence_relations)
6206 {
6207   bool res = true;
6208
6209   memset (&dependence_stats, 0, sizeof (dependence_stats));
6210
6211   /* If the loop nest is not well formed, or one of the data references
6212      is not computable, give up without spending time to compute other
6213      dependences.  */
6214   if (!loop
6215       || !find_loop_nest (loop, loop_nest)
6216       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
6217       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
6218                                    compute_self_and_read_read_dependences))
6219     res = false;
6220
6221   if (dump_file && (dump_flags & TDF_STATS))
6222     {
6223       fprintf (dump_file, "Dependence tester statistics:\n");
6224
6225       fprintf (dump_file, "Number of dependence tests: %d\n",
6226                dependence_stats.num_dependence_tests);
6227       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
6228                dependence_stats.num_dependence_dependent);
6229       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
6230                dependence_stats.num_dependence_independent);
6231       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
6232                dependence_stats.num_dependence_undetermined);
6233
6234       fprintf (dump_file, "Number of subscript tests: %d\n",
6235                dependence_stats.num_subscript_tests);
6236       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
6237                dependence_stats.num_subscript_undetermined);
6238       fprintf (dump_file, "Number of same subscript function: %d\n",
6239                dependence_stats.num_same_subscript_function);
6240
6241       fprintf (dump_file, "Number of ziv tests: %d\n",
6242                dependence_stats.num_ziv);
6243       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
6244                dependence_stats.num_ziv_dependent);
6245       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
6246                dependence_stats.num_ziv_independent);
6247       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
6248                dependence_stats.num_ziv_unimplemented);
6249
6250       fprintf (dump_file, "Number of siv tests: %d\n",
6251                dependence_stats.num_siv);
6252       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
6253                dependence_stats.num_siv_dependent);
6254       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
6255                dependence_stats.num_siv_independent);
6256       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
6257                dependence_stats.num_siv_unimplemented);
6258
6259       fprintf (dump_file, "Number of miv tests: %d\n",
6260                dependence_stats.num_miv);
6261       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
6262                dependence_stats.num_miv_dependent);
6263       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
6264                dependence_stats.num_miv_independent);
6265       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
6266                dependence_stats.num_miv_unimplemented);
6267     }
6268
6269   return res;
6270 }
6271
6272 /* Free the memory used by a data dependence relation DDR.  */
6273
6274 void
6275 free_dependence_relation (struct data_dependence_relation *ddr)
6276 {
6277   if (ddr == NULL)
6278     return;
6279
6280   if (DDR_SUBSCRIPTS (ddr).exists ())
6281     free_subscripts (DDR_SUBSCRIPTS (ddr));
6282   DDR_DIST_VECTS (ddr).release ();
6283   DDR_DIR_VECTS (ddr).release ();
6284
6285   free (ddr);
6286 }
6287
6288 /* Free the memory used by the data dependence relations from
6289    DEPENDENCE_RELATIONS.  */
6290
6291 void
6292 free_dependence_relations (vec<ddr_p>& dependence_relations)
6293 {
6294   for (data_dependence_relation *ddr : dependence_relations)
6295     if (ddr)
6296       free_dependence_relation (ddr);
6297
6298   dependence_relations.release ();
6299 }
6300
6301 /* Free the memory used by the data references from DATAREFS.  */
6302
6303 void
6304 free_data_refs (vec<data_reference_p>& datarefs)
6305 {
6306   for (data_reference *dr : datarefs)
6307     free_data_ref (dr);
6308   datarefs.release ();
6309 }
6310
6311 /* Common routine implementing both dr_direction_indicator and
6312    dr_zero_step_indicator.  Return USEFUL_MIN if the indicator is known
6313    to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6314    Return the step as the indicator otherwise.  */
6315
6316 static tree
6317 dr_step_indicator (struct data_reference *dr, int useful_min)
6318 {
6319   tree step = DR_STEP (dr);
6320   if (!step)
6321     return NULL_TREE;
6322   STRIP_NOPS (step);
6323   /* Look for cases where the step is scaled by a positive constant
6324      integer, which will often be the access size.  If the multiplication
6325      doesn't change the sign (due to overflow effects) then we can
6326      test the unscaled value instead.  */
6327   if (TREE_CODE (step) == MULT_EXPR
6328       && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6329       && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6330     {
6331       tree factor = TREE_OPERAND (step, 1);
6332       step = TREE_OPERAND (step, 0);
6333
6334       /* Strip widening and truncating conversions as well as nops.  */
6335       if (CONVERT_EXPR_P (step)
6336           && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6337         step = TREE_OPERAND (step, 0);
6338       tree type = TREE_TYPE (step);
6339
6340       /* Get the range of step values that would not cause overflow.  */
6341       widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6342                          / wi::to_widest (factor));
6343       widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6344                          / wi::to_widest (factor));
6345
6346       /* Get the range of values that the unconverted step actually has.  */
6347       wide_int step_min, step_max;
6348       value_range vr;
6349       if (TREE_CODE (step) != SSA_NAME
6350           || !get_range_query (cfun)->range_of_expr (vr, step)
6351           || vr.kind () != VR_RANGE)
6352         {
6353           step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6354           step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6355         }
6356       else
6357         {
6358           step_min = vr.lower_bound ();
6359           step_max = vr.upper_bound ();
6360         }
6361
6362       /* Check whether the unconverted step has an acceptable range.  */
6363       signop sgn = TYPE_SIGN (type);
6364       if (wi::les_p (minv, widest_int::from (step_min, sgn))
6365           && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6366         {
6367           if (wi::ge_p (step_min, useful_min, sgn))
6368             return ssize_int (useful_min);
6369           else if (wi::lt_p (step_max, 0, sgn))
6370             return ssize_int (-1);
6371           else
6372             return fold_convert (ssizetype, step);
6373         }
6374     }
6375   return DR_STEP (dr);
6376 }
6377
6378 /* Return a value that is negative iff DR has a negative step.  */
6379
6380 tree
6381 dr_direction_indicator (struct data_reference *dr)
6382 {
6383   return dr_step_indicator (dr, 0);
6384 }
6385
6386 /* Return a value that is zero iff DR has a zero step.  */
6387
6388 tree
6389 dr_zero_step_indicator (struct data_reference *dr)
6390 {
6391   return dr_step_indicator (dr, 1);
6392 }
6393
6394 /* Return true if DR is known to have a nonnegative (but possibly zero)
6395    step.  */
6396
6397 bool
6398 dr_known_forward_stride_p (struct data_reference *dr)
6399 {
6400   tree indicator = dr_direction_indicator (dr);
6401   tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6402                                    fold_convert (ssizetype, indicator),
6403                                    ssize_int (0));
6404   return neg_step_val && integer_zerop (neg_step_val);
6405 }