gcc/tree-data-ref.cc

   1 /* Data references and dependences detectors.
   2    Copyright (C) 2003-2023 Free Software Foundation, Inc.
   3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass walks a given loop structure searching for array
  22    references.  The information about the array accesses is recorded
  23    in DATA_REFERENCE structures.
  24
  25    The basic test for determining the dependences is:
  26    given two access functions chrec1 and chrec2 to a same array, and
  27    x and y two vectors from the iteration domain, the same element of
  28    the array is accessed twice at iterations x and y if and only if:
  29    |             chrec1 (x) == chrec2 (y).
  30
  31    The goals of this analysis are:
  32
  33    - to determine the independence: the relation between two
  34      independent accesses is qualified with the chrec_known (this
  35      information allows a loop parallelization),
  36
  37    - when two data references access the same data, to qualify the
  38      dependence relation with classic dependence representations:
  39
  40        - distance vectors
  41        - direction vectors
  42        - loop carried level dependence
  43        - polyhedron dependence
  44      or with the chains of recurrences based representation,
  45
  46    - to define a knowledge base for storing the data dependence
  47      information,
  48
  49    - to define an interface to access this data.
  50
  51
  52    Definitions:
  53
  54    - subscript: given two array accesses a subscript is the tuple
  55    composed of the access functions for a given dimension.  Example:
  56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
  57    (f1, g1), (f2, g2), (f3, g3).
  58
  59    - Diophantine equation: an equation whose coefficients and
  60    solutions are integer constants, for example the equation
  61    |   3*x + 2*y = 1
  62    has an integer solution x = 1 and y = -1.
  63
  64    References:
  65
  66    - "Advanced Compilation for High Performance Computing" by Randy
  67    Allen and Ken Kennedy.
  68    http://citeseer.ist.psu.edu/goff91practical.html
  69
  70    - "Loop Transformations for Restructuring Compilers - The Foundations"
  71    by Utpal Banerjee.
  72
  73
  74 */
  75
  76 #include "config.h"
  77 #include "system.h"
  78 #include "coretypes.h"
  79 #include "backend.h"
  80 #include "rtl.h"
  81 #include "tree.h"
  82 #include "gimple.h"
  83 #include "gimple-pretty-print.h"
  84 #include "alias.h"
  85 #include "fold-const.h"
  86 #include "expr.h"
  87 #include "gimple-iterator.h"
  88 #include "tree-ssa-loop-niter.h"
  89 #include "tree-ssa-loop.h"
  90 #include "tree-ssa.h"
  91 #include "cfgloop.h"
  92 #include "tree-data-ref.h"
  93 #include "tree-scalar-evolution.h"
  94 #include "dumpfile.h"
  95 #include "tree-affine.h"
  96 #include "builtins.h"
  97 #include "tree-eh.h"
  98 #include "ssa.h"
  99 #include "internal-fn.h"
 100 #include "vr-values.h"
 101 #include "range-op.h"
 102 #include "tree-ssa-loop-ivopts.h"
 103
 104 static struct datadep_stats
 105 {
 106   int num_dependence_tests;
 107   int num_dependence_dependent;
 108   int num_dependence_independent;
 109   int num_dependence_undetermined;
 110
 111   int num_subscript_tests;
 112   int num_subscript_undetermined;
 113   int num_same_subscript_function;
 114
 115   int num_ziv;
 116   int num_ziv_independent;
 117   int num_ziv_dependent;
 118   int num_ziv_unimplemented;
 119
 120   int num_siv;
 121   int num_siv_independent;
 122   int num_siv_dependent;
 123   int num_siv_unimplemented;
 124
 125   int num_miv;
 126   int num_miv_independent;
 127   int num_miv_dependent;
 128   int num_miv_unimplemented;
 129 } dependence_stats;
 130
 131 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
 132                                            unsigned int, unsigned int,
 133                                            class loop *);
 134 /* Returns true iff A divides B.  */
 135
 136 static inline bool
 137 tree_fold_divides_p (const_tree a, const_tree b)
 138 {
 139   gcc_assert (TREE_CODE (a) == INTEGER_CST);
 140   gcc_assert (TREE_CODE (b) == INTEGER_CST);
 141   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
 142 }
 143
 144 /* Returns true iff A divides B.  */
 145
 146 static inline bool
 147 int_divides_p (lambda_int a, lambda_int b)
 148 {
 149   return ((b % a) == 0);
 150 }
 151
 152 /* Return true if reference REF contains a union access.  */
 153
 154 static bool
 155 ref_contains_union_access_p (tree ref)
 156 {
 157   while (handled_component_p (ref))
 158     {
 159       ref = TREE_OPERAND (ref, 0);
 160       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
 161           || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
 162         return true;
 163     }
 164   return false;
 165 }
 166
 167 \f
 168
 169 /* Dump into FILE all the data references from DATAREFS.  */
 170
 171 static void
 172 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
 173 {
 174   for (data_reference *dr : datarefs)
 175     dump_data_reference (file, dr);
 176 }
 177
 178 /* Unified dump into FILE all the data references from DATAREFS.  */
 179
 180 DEBUG_FUNCTION void
 181 debug (vec<data_reference_p> &ref)
 182 {
 183   dump_data_references (stderr, ref);
 184 }
 185
 186 DEBUG_FUNCTION void
 187 debug (vec<data_reference_p> *ptr)
 188 {
 189   if (ptr)
 190     debug (*ptr);
 191   else
 192     fprintf (stderr, "<nil>\n");
 193 }
 194
 195
 196 /* Dump into STDERR all the data references from DATAREFS.  */
 197
 198 DEBUG_FUNCTION void
 199 debug_data_references (vec<data_reference_p> datarefs)
 200 {
 201   dump_data_references (stderr, datarefs);
 202 }
 203
 204 /* Print to STDERR the data_reference DR.  */
 205
 206 DEBUG_FUNCTION void
 207 debug_data_reference (struct data_reference *dr)
 208 {
 209   dump_data_reference (stderr, dr);
 210 }
 211
 212 /* Dump function for a DATA_REFERENCE structure.  */
 213
 214 void
 215 dump_data_reference (FILE *outf,
 216                      struct data_reference *dr)
 217 {
 218   unsigned int i;
 219
 220   fprintf (outf, "#(Data Ref: \n");
 221   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
 222   fprintf (outf, "#  stmt: ");
 223   print_gimple_stmt (outf, DR_STMT (dr), 0);
 224   fprintf (outf, "#  ref: ");
 225   print_generic_stmt (outf, DR_REF (dr));
 226   fprintf (outf, "#  base_object: ");
 227   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
 228
 229   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
 230     {
 231       fprintf (outf, "#  Access function %d: ", i);
 232       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
 233     }
 234   fprintf (outf, "#)\n");
 235 }
 236
 237 /* Unified dump function for a DATA_REFERENCE structure.  */
 238
 239 DEBUG_FUNCTION void
 240 debug (data_reference &ref)
 241 {
 242   dump_data_reference (stderr, &ref);
 243 }
 244
 245 DEBUG_FUNCTION void
 246 debug (data_reference *ptr)
 247 {
 248   if (ptr)
 249     debug (*ptr);
 250   else
 251     fprintf (stderr, "<nil>\n");
 252 }
 253
 254
 255 /* Dumps the affine function described by FN to the file OUTF.  */
 256
 257 DEBUG_FUNCTION void
 258 dump_affine_function (FILE *outf, affine_fn fn)
 259 {
 260   unsigned i;
 261   tree coef;
 262
 263   print_generic_expr (outf, fn[0], TDF_SLIM);
 264   for (i = 1; fn.iterate (i, &coef); i++)
 265     {
 266       fprintf (outf, " + ");
 267       print_generic_expr (outf, coef, TDF_SLIM);
 268       fprintf (outf, " * x_%u", i);
 269     }
 270 }
 271
 272 /* Dumps the conflict function CF to the file OUTF.  */
 273
 274 DEBUG_FUNCTION void
 275 dump_conflict_function (FILE *outf, conflict_function *cf)
 276 {
 277   unsigned i;
 278
 279   if (cf->n == NO_DEPENDENCE)
 280     fprintf (outf, "no dependence");
 281   else if (cf->n == NOT_KNOWN)
 282     fprintf (outf, "not known");
 283   else
 284     {
 285       for (i = 0; i < cf->n; i++)
 286         {
 287           if (i != 0)
 288             fprintf (outf, " ");
 289           fprintf (outf, "[");
 290           dump_affine_function (outf, cf->fns[i]);
 291           fprintf (outf, "]");
 292         }
 293     }
 294 }
 295
 296 /* Dump function for a SUBSCRIPT structure.  */
 297
 298 DEBUG_FUNCTION void
 299 dump_subscript (FILE *outf, struct subscript *subscript)
 300 {
 301   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
 302
 303   fprintf (outf, "\n (subscript \n");
 304   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
 305   dump_conflict_function (outf, cf);
 306   if (CF_NONTRIVIAL_P (cf))
 307     {
 308       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 309       fprintf (outf, "\n  last_conflict: ");
 310       print_generic_expr (outf, last_iteration);
 311     }
 312
 313   cf = SUB_CONFLICTS_IN_B (subscript);
 314   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
 315   dump_conflict_function (outf, cf);
 316   if (CF_NONTRIVIAL_P (cf))
 317     {
 318       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 319       fprintf (outf, "\n  last_conflict: ");
 320       print_generic_expr (outf, last_iteration);
 321     }
 322
 323   fprintf (outf, "\n  (Subscript distance: ");
 324   print_generic_expr (outf, SUB_DISTANCE (subscript));
 325   fprintf (outf, " ))\n");
 326 }
 327
 328 /* Print the classic direction vector DIRV to OUTF.  */
 329
 330 DEBUG_FUNCTION void
 331 print_direction_vector (FILE *outf,
 332                         lambda_vector dirv,
 333                         int length)
 334 {
 335   int eq;
 336
 337   for (eq = 0; eq < length; eq++)
 338     {
 339       enum data_dependence_direction dir = ((enum data_dependence_direction)
 340                                             dirv[eq]);
 341
 342       switch (dir)
 343         {
 344         case dir_positive:
 345           fprintf (outf, "    +");
 346           break;
 347         case dir_negative:
 348           fprintf (outf, "    -");
 349           break;
 350         case dir_equal:
 351           fprintf (outf, "    =");
 352           break;
 353         case dir_positive_or_equal:
 354           fprintf (outf, "   +=");
 355           break;
 356         case dir_positive_or_negative:
 357           fprintf (outf, "   +-");
 358           break;
 359         case dir_negative_or_equal:
 360           fprintf (outf, "   -=");
 361           break;
 362         case dir_star:
 363           fprintf (outf, "    *");
 364           break;
 365         default:
 366           fprintf (outf, "indep");
 367           break;
 368         }
 369     }
 370   fprintf (outf, "\n");
 371 }
 372
 373 /* Print a vector of direction vectors.  */
 374
 375 DEBUG_FUNCTION void
 376 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
 377                    int length)
 378 {
 379   for (lambda_vector v : dir_vects)
 380     print_direction_vector (outf, v, length);
 381 }
 382
 383 /* Print out a vector VEC of length N to OUTFILE.  */
 384
 385 DEBUG_FUNCTION void
 386 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
 387 {
 388   int i;
 389
 390   for (i = 0; i < n; i++)
 391     fprintf (outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]);
 392   fprintf (outfile, "\n");
 393 }
 394
 395 /* Print a vector of distance vectors.  */
 396
 397 DEBUG_FUNCTION void
 398 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
 399                     int length)
 400 {
 401   for (lambda_vector v : dist_vects)
 402     print_lambda_vector (outf, v, length);
 403 }
 404
 405 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
 406
 407 DEBUG_FUNCTION void
 408 dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
 409 {
 410   struct data_reference *dra, *drb;
 411
 412   fprintf (outf, "(Data Dep: \n");
 413
 414   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 415     {
 416       if (ddr)
 417         {
 418           dra = DDR_A (ddr);
 419           drb = DDR_B (ddr);
 420           if (dra)
 421             dump_data_reference (outf, dra);
 422           else
 423             fprintf (outf, "    (nil)\n");
 424           if (drb)
 425             dump_data_reference (outf, drb);
 426           else
 427             fprintf (outf, "    (nil)\n");
 428         }
 429       fprintf (outf, "    (don't know)\n)\n");
 430       return;
 431     }
 432
 433   dra = DDR_A (ddr);
 434   drb = DDR_B (ddr);
 435   dump_data_reference (outf, dra);
 436   dump_data_reference (outf, drb);
 437
 438   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 439     fprintf (outf, "    (no dependence)\n");
 440
 441   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
 442     {
 443       unsigned int i;
 444       class loop *loopi;
 445
 446       subscript *sub;
 447       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
 448         {
 449           fprintf (outf, "  access_fn_A: ");
 450           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
 451           fprintf (outf, "  access_fn_B: ");
 452           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
 453           dump_subscript (outf, sub);
 454         }
 455
 456       fprintf (outf, "  loop nest: (");
 457       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
 458         fprintf (outf, "%d ", loopi->num);
 459       fprintf (outf, ")\n");
 460
 461       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
 462         {
 463           fprintf (outf, "  distance_vector: ");
 464           print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
 465                                DDR_NB_LOOPS (ddr));
 466         }
 467
 468       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
 469         {
 470           fprintf (outf, "  direction_vector: ");
 471           print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
 472                                   DDR_NB_LOOPS (ddr));
 473         }
 474     }
 475
 476   fprintf (outf, ")\n");
 477 }
 478
 479 /* Debug version.  */
 480
 481 DEBUG_FUNCTION void
 482 debug_data_dependence_relation (const struct data_dependence_relation *ddr)
 483 {
 484   dump_data_dependence_relation (stderr, ddr);
 485 }
 486
 487 /* Dump into FILE all the dependence relations from DDRS.  */
 488
 489 DEBUG_FUNCTION void
 490 dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
 491 {
 492   for (auto ddr : ddrs)
 493     dump_data_dependence_relation (file, ddr);
 494 }
 495
 496 DEBUG_FUNCTION void
 497 debug (vec<ddr_p> &ref)
 498 {
 499   dump_data_dependence_relations (stderr, ref);
 500 }
 501
 502 DEBUG_FUNCTION void
 503 debug (vec<ddr_p> *ptr)
 504 {
 505   if (ptr)
 506     debug (*ptr);
 507   else
 508     fprintf (stderr, "<nil>\n");
 509 }
 510
 511
 512 /* Dump to STDERR all the dependence relations from DDRS.  */
 513
 514 DEBUG_FUNCTION void
 515 debug_data_dependence_relations (vec<ddr_p> ddrs)
 516 {
 517   dump_data_dependence_relations (stderr, ddrs);
 518 }
 519
 520 /* Dumps the distance and direction vectors in FILE.  DDRS contains
 521    the dependence relations, and VECT_SIZE is the size of the
 522    dependence vectors, or in other words the number of loops in the
 523    considered nest.  */
 524
 525 DEBUG_FUNCTION void
 526 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
 527 {
 528   for (data_dependence_relation *ddr : ddrs)
 529     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
 530       {
 531         for (lambda_vector v : DDR_DIST_VECTS (ddr))
 532           {
 533             fprintf (file, "DISTANCE_V (");
 534             print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
 535             fprintf (file, ")\n");
 536           }
 537
 538         for (lambda_vector v : DDR_DIR_VECTS (ddr))
 539           {
 540             fprintf (file, "DIRECTION_V (");
 541             print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
 542             fprintf (file, ")\n");
 543           }
 544       }
 545
 546   fprintf (file, "\n\n");
 547 }
 548
 549 /* Dumps the data dependence relations DDRS in FILE.  */
 550
 551 DEBUG_FUNCTION void
 552 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
 553 {
 554   for (data_dependence_relation *ddr : ddrs)
 555     dump_data_dependence_relation (file, ddr);
 556
 557   fprintf (file, "\n\n");
 558 }
 559
 560 DEBUG_FUNCTION void
 561 debug_ddrs (vec<ddr_p> ddrs)
 562 {
 563   dump_ddrs (stderr, ddrs);
 564 }
 565
 566 /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
 567    OP0 CODE OP1, where:
 568
 569    - OP0 CODE OP1 has integral type TYPE
 570    - the range of OP0 is given by OP0_RANGE and
 571    - the range of OP1 is given by OP1_RANGE.
 572
 573    Independently of RESULT_RANGE, try to compute:
 574
 575      DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
 576              - (sizetype) (OP0 CODE OP1)
 577
 578    as a constant and subtract DELTA from the ssizetype constant in *OFF.
 579    Return true on success, or false if DELTA is not known at compile time.
 580
 581    Truncation and sign changes are known to distribute over CODE, i.e.
 582
 583      (itype) (A CODE B) == (itype) A CODE (itype) B
 584
 585    for any integral type ITYPE whose precision is no greater than the
 586    precision of A and B.  */
 587
 588 static bool
 589 compute_distributive_range (tree type, value_range &op0_range,
 590                             tree_code code, value_range &op1_range,
 591                             tree *off, value_range *result_range)
 592 {
 593   gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
 594   if (result_range)
 595     {
 596       range_op_handler op (code);
 597       if (!op.fold_range (*result_range, type, op0_range, op1_range))
 598         result_range->set_varying (type);
 599     }
 600
 601   /* The distributive property guarantees that if TYPE is no narrower
 602      than SIZETYPE,
 603
 604        (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
 605
 606      and so we can treat DELTA as zero.  */
 607   if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
 608     return true;
 609
 610   /* If overflow is undefined, we can assume that:
 611
 612        X == (ssizetype) OP0 CODE (ssizetype) OP1
 613
 614      is within the range of TYPE, i.e.:
 615
 616        X == (ssizetype) (TYPE) X
 617
 618      Distributing the (TYPE) truncation over X gives:
 619
 620        X == (ssizetype) (OP0 CODE OP1)
 621
 622      Casting both sides to sizetype and distributing the sizetype cast
 623      over X gives:
 624
 625        (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
 626
 627      and so we can treat DELTA as zero.  */
 628   if (TYPE_OVERFLOW_UNDEFINED (type))
 629     return true;
 630
 631   /* Compute the range of:
 632
 633        (ssizetype) OP0 CODE (ssizetype) OP1
 634
 635      The distributive property guarantees that this has the same bitpattern as:
 636
 637        (sizetype) OP0 CODE (sizetype) OP1
 638
 639      but its range is more conducive to analysis.  */
 640   range_cast (op0_range, ssizetype);
 641   range_cast (op1_range, ssizetype);
 642   value_range wide_range;
 643   range_op_handler op (code);
 644   bool saved_flag_wrapv = flag_wrapv;
 645   flag_wrapv = 1;
 646   if (!op.fold_range (wide_range, ssizetype, op0_range, op1_range))
 647     wide_range.set_varying (ssizetype);;
 648   flag_wrapv = saved_flag_wrapv;
 649   if (wide_range.num_pairs () != 1
 650       || wide_range.varying_p () || wide_range.undefined_p ())
 651     return false;
 652
 653   wide_int lb = wide_range.lower_bound ();
 654   wide_int ub = wide_range.upper_bound ();
 655
 656   /* Calculate the number of times that each end of the range overflows or
 657      underflows TYPE.  We can only calculate DELTA if the numbers match.  */
 658   unsigned int precision = TYPE_PRECISION (type);
 659   if (!TYPE_UNSIGNED (type))
 660     {
 661       wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
 662       lb -= type_min;
 663       ub -= type_min;
 664     }
 665   wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
 666   lb &= upper_bits;
 667   ub &= upper_bits;
 668   if (lb != ub)
 669     return false;
 670
 671   /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
 672      negative values indicating underflow.  The low PRECISION bits of LB
 673      are clear, so DELTA is therefore LB (== UB).  */
 674   *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
 675   return true;
 676 }
 677
 678 /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
 679    given that OP has type FROM_TYPE and range RANGE.  Both TO_TYPE and
 680    FROM_TYPE are integral types.  */
 681
 682 static bool
 683 nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range)
 684 {
 685   gcc_assert (INTEGRAL_TYPE_P (to_type)
 686               && INTEGRAL_TYPE_P (from_type)
 687               && !TYPE_OVERFLOW_TRAPS (to_type)
 688               && !TYPE_OVERFLOW_TRAPS (from_type));
 689
 690   /* Converting to something no narrower than sizetype and then to sizetype
 691      is equivalent to converting directly to sizetype.  */
 692   if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
 693     return true;
 694
 695   /* Check whether TO_TYPE can represent all values that FROM_TYPE can.  */
 696   if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
 697       && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
 698     return true;
 699
 700   /* For narrowing conversions, we could in principle test whether
 701      the bits in FROM_TYPE but not in TO_TYPE have a fixed value
 702      and apply a constant adjustment.
 703
 704      For other conversions (which involve a sign change) we could
 705      check that the signs are always equal, and apply a constant
 706      adjustment if the signs are negative.
 707
 708      However, both cases should be rare.  */
 709   return range_fits_type_p (&range, TYPE_PRECISION (to_type),
 710                             TYPE_SIGN (to_type));
 711 }
 712
 713 static void
 714 split_constant_offset (tree type, tree *var, tree *off,
 715                        value_range *result_range,
 716                        hash_map<tree, std::pair<tree, tree> > &cache,
 717                        unsigned *limit);
 718
 719 /* Helper function for split_constant_offset.  If TYPE is a pointer type,
 720    try to express OP0 CODE OP1 as:
 721
 722      POINTER_PLUS <*VAR, (sizetype) *OFF>
 723
 724    where:
 725
 726    - *VAR has type TYPE
 727    - *OFF is a constant of type ssizetype.
 728
 729    If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
 730
 731      *VAR + (sizetype) *OFF
 732
 733    where:
 734
 735    - *VAR has type sizetype
 736    - *OFF is a constant of type ssizetype.
 737
 738    In both cases, OP0 CODE OP1 has type TYPE.
 739
 740    Return true on success.  A false return value indicates that we can't
 741    do better than set *OFF to zero.
 742
 743    When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
 744    if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
 745
 746    CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
 747    visited.  LIMIT counts down the number of SSA names that we are
 748    allowed to process before giving up.  */
 749
 750 static bool
 751 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 752                          tree *var, tree *off, value_range *result_range,
 753                          hash_map<tree, std::pair<tree, tree> > &cache,
 754                          unsigned *limit)
 755 {
 756   tree var0, var1;
 757   tree off0, off1;
 758   value_range op0_range, op1_range;
 759
 760   *var = NULL_TREE;
 761   *off = NULL_TREE;
 762
 763   if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
 764     return false;
 765
 766   switch (code)
 767     {
 768     case INTEGER_CST:
 769       *var = size_int (0);
 770       *off = fold_convert (ssizetype, op0);
 771       if (result_range)
 772         {
 773           wide_int w = wi::to_wide (op0);
 774           result_range->set (TREE_TYPE (op0), w, w);
 775         }
 776       return true;
 777
 778     case POINTER_PLUS_EXPR:
 779       split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
 780       split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
 781       *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
 782       *off = size_binop (PLUS_EXPR, off0, off1);
 783       return true;
 784
 785     case PLUS_EXPR:
 786     case MINUS_EXPR:
 787       split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
 788       split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
 789       *off = size_binop (code, off0, off1);
 790       if (!compute_distributive_range (type, op0_range, code, op1_range,
 791                                        off, result_range))
 792         return false;
 793       *var = fold_build2 (code, sizetype, var0, var1);
 794       return true;
 795
 796     case MULT_EXPR:
 797       if (TREE_CODE (op1) != INTEGER_CST)
 798         return false;
 799
 800       split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
 801       op1_range.set (TREE_TYPE (op1), wi::to_wide (op1), wi::to_wide (op1));
 802       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
 803       if (!compute_distributive_range (type, op0_range, code, op1_range,
 804                                        off, result_range))
 805         return false;
 806       *var = fold_build2 (MULT_EXPR, sizetype, var0,
 807                           fold_convert (sizetype, op1));
 808       return true;
 809
 810     case ADDR_EXPR:
 811       {
 812         tree base, poffset;
 813         poly_int64 pbitsize, pbitpos, pbytepos;
 814         machine_mode pmode;
 815         int punsignedp, preversep, pvolatilep;
 816
 817         op0 = TREE_OPERAND (op0, 0);
 818         base
 819           = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
 820                                  &punsignedp, &preversep, &pvolatilep);
 821
 822         if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 823           return false;
 824         base = build_fold_addr_expr (base);
 825         off0 = ssize_int (pbytepos);
 826
 827         if (poffset)
 828           {
 829             split_constant_offset (poffset, &poffset, &off1, nullptr,
 830                                    cache, limit);
 831             off0 = size_binop (PLUS_EXPR, off0, off1);
 832             base = fold_build_pointer_plus (base, poffset);
 833           }
 834
 835         var0 = fold_convert (type, base);
 836
 837         /* If variable length types are involved, punt, otherwise casts
 838            might be converted into ARRAY_REFs in gimplify_conversion.
 839            To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
 840            possibly no longer appears in current GIMPLE, might resurface.
 841            This perhaps could run
 842            if (CONVERT_EXPR_P (var0))
 843              {
 844                gimplify_conversion (&var0);
 845                // Attempt to fill in any within var0 found ARRAY_REF's
 846                // element size from corresponding op embedded ARRAY_REF,
 847                // if unsuccessful, just punt.
 848              }  */
 849         while (POINTER_TYPE_P (type))
 850           type = TREE_TYPE (type);
 851         if (int_size_in_bytes (type) < 0)
 852           return false;
 853
 854         *var = var0;
 855         *off = off0;
 856         return true;
 857       }
 858
 859     case SSA_NAME:
 860       {
 861         if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
 862           return false;
 863
 864         gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
 865         enum tree_code subcode;
 866
 867         if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
 868           return false;
 869
 870         subcode = gimple_assign_rhs_code (def_stmt);
 871
 872         /* We are using a cache to avoid un-CSEing large amounts of code.  */
 873         bool use_cache = false;
 874         if (!has_single_use (op0)
 875             && (subcode == POINTER_PLUS_EXPR
 876                 || subcode == PLUS_EXPR
 877                 || subcode == MINUS_EXPR
 878                 || subcode == MULT_EXPR
 879                 || subcode == ADDR_EXPR
 880                 || CONVERT_EXPR_CODE_P (subcode)))
 881           {
 882             use_cache = true;
 883             bool existed;
 884             std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
 885             if (existed)
 886               {
 887                 if (integer_zerop (e.second))
 888                   return false;
 889                 *var = e.first;
 890                 *off = e.second;
 891                 /* The caller sets the range in this case.  */
 892                 return true;
 893               }
 894             e = std::make_pair (op0, ssize_int (0));
 895           }
 896
 897         if (*limit == 0)
 898           return false;
 899         --*limit;
 900
 901         var0 = gimple_assign_rhs1 (def_stmt);
 902         var1 = gimple_assign_rhs2 (def_stmt);
 903
 904         bool res = split_constant_offset_1 (type, var0, subcode, var1,
 905                                             var, off, nullptr, cache, limit);
 906         if (res && use_cache)
 907           *cache.get (op0) = std::make_pair (*var, *off);
 908         /* The caller sets the range in this case.  */
 909         return res;
 910       }
 911     CASE_CONVERT:
 912       {
 913         /* We can only handle the following conversions:
 914
 915            - Conversions from one pointer type to another pointer type.
 916
 917            - Conversions from one non-trapping integral type to another
 918              non-trapping integral type.  In this case, the recursive
 919              call makes sure that:
 920
 921                (sizetype) OP0
 922
 923              can be expressed as a sizetype operation involving VAR and OFF,
 924              and all we need to do is check whether:
 925
 926                (sizetype) OP0 == (sizetype) (TYPE) OP0
 927
 928            - Conversions from a non-trapping sizetype-size integral type to
 929              a like-sized pointer type.  In this case, the recursive call
 930              makes sure that:
 931
 932                (sizetype) OP0 == *VAR + (sizetype) *OFF
 933
 934              and we can convert that to:
 935
 936                POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
 937
 938            - Conversions from a sizetype-sized pointer type to a like-sized
 939              non-trapping integral type.  In this case, the recursive call
 940              makes sure that:
 941
 942                OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
 943
 944              where the POINTER_PLUS and *VAR have the same precision as
 945              TYPE (and the same precision as sizetype).  Then:
 946
 947                (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF.  */
 948         tree itype = TREE_TYPE (op0);
 949         if ((POINTER_TYPE_P (itype)
 950              || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
 951             && (POINTER_TYPE_P (type)
 952                 || (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
 953             && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
 954                 || (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
 955                     && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
 956           {
 957             if (POINTER_TYPE_P (type))
 958               {
 959                 split_constant_offset (op0, var, off, nullptr, cache, limit);
 960                 *var = fold_convert (type, *var);
 961               }
 962             else if (POINTER_TYPE_P (itype))
 963               {
 964                 split_constant_offset (op0, var, off, nullptr, cache, limit);
 965                 *var = fold_convert (sizetype, *var);
 966               }
 967             else
 968               {
 969                 split_constant_offset (op0, var, off, &op0_range,
 970                                        cache, limit);
 971                 if (!nop_conversion_for_offset_p (type, itype, op0_range))
 972                   return false;
 973                 if (result_range)
 974                   {
 975                     *result_range = op0_range;
 976                     range_cast (*result_range, type);
 977                   }
 978               }
 979             return true;
 980           }
 981         return false;
 982       }
 983
 984     default:
 985       return false;
 986     }
 987 }
 988
 989 /* If EXP has pointer type, try to express it as:
 990
 991      POINTER_PLUS <*VAR, (sizetype) *OFF>
 992
 993    where:
 994
 995    - *VAR has the same type as EXP
 996    - *OFF is a constant of type ssizetype.
 997
 998    If EXP has an integral type, try to express (sizetype) EXP as:
 999
1000      *VAR + (sizetype) *OFF
1001
1002    where:
1003
1004    - *VAR has type sizetype
1005    - *OFF is a constant of type ssizetype.
1006
1007    If EXP_RANGE is nonnull, set it to the range of EXP.
1008
1009    CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1010    visited.  LIMIT counts down the number of SSA names that we are
1011    allowed to process before giving up.  */
1012
1013 static void
1014 split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range,
1015                        hash_map<tree, std::pair<tree, tree> > &cache,
1016                        unsigned *limit)
1017 {
1018   tree type = TREE_TYPE (exp), op0, op1;
1019   enum tree_code code;
1020
1021   code = TREE_CODE (exp);
1022   if (exp_range)
1023     {
1024       *exp_range = type;
1025       if (code == SSA_NAME)
1026         {
1027           value_range vr;
1028           get_range_query (cfun)->range_of_expr (vr, exp);
1029           if (vr.undefined_p ())
1030             vr.set_varying (TREE_TYPE (exp));
1031           tree vr_min, vr_max;
1032           value_range_kind vr_kind = get_legacy_range (vr, vr_min, vr_max);
1033           wide_int var_min = wi::to_wide (vr_min);
1034           wide_int var_max = wi::to_wide (vr_max);
1035           wide_int var_nonzero = get_nonzero_bits (exp);
1036           vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1037                                                        &var_min, &var_max,
1038                                                        var_nonzero,
1039                                                        TYPE_SIGN (type));
1040           /* This check for VR_VARYING is here because the old code
1041              using get_range_info would return VR_RANGE for the entire
1042              domain, instead of VR_VARYING.  The new code normalizes
1043              full-domain ranges to VR_VARYING.  */
1044           if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
1045             *exp_range = value_range (type, var_min, var_max);
1046         }
1047     }
1048
1049   if (!tree_is_chrec (exp)
1050       && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1051     {
1052       extract_ops_from_tree (exp, &code, &op0, &op1);
1053       if (split_constant_offset_1 (type, op0, code, op1, var, off,
1054                                    exp_range, cache, limit))
1055         return;
1056     }
1057
1058   *var = exp;
1059   if (INTEGRAL_TYPE_P (type))
1060     *var = fold_convert (sizetype, *var);
1061   *off = ssize_int (0);
1062
1063   value_range r;
1064   if (exp_range && code != SSA_NAME
1065       && get_range_query (cfun)->range_of_expr (r, exp)
1066       && !r.undefined_p ())
1067     *exp_range = r;
1068 }
1069
1070 /* Expresses EXP as VAR + OFF, where OFF is a constant.  VAR has the same
1071    type as EXP while OFF has type ssizetype.  */
1072
1073 void
1074 split_constant_offset (tree exp, tree *var, tree *off)
1075 {
1076   unsigned limit = param_ssa_name_def_chain_limit;
1077   static hash_map<tree, std::pair<tree, tree> > *cache;
1078   if (!cache)
1079     cache = new hash_map<tree, std::pair<tree, tree> > (37);
1080   split_constant_offset (exp, var, off, nullptr, *cache, &limit);
1081   *var = fold_convert (TREE_TYPE (exp), *var);
1082   cache->empty ();
1083 }
1084
1085 /* Returns the address ADDR of an object in a canonical shape (without nop
1086    casts, and with type of pointer to the object).  */
1087
1088 static tree
1089 canonicalize_base_object_address (tree addr)
1090 {
1091   tree orig = addr;
1092
1093   STRIP_NOPS (addr);
1094
1095   /* The base address may be obtained by casting from integer, in that case
1096      keep the cast.  */
1097   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1098     return orig;
1099
1100   if (TREE_CODE (addr) != ADDR_EXPR)
1101     return addr;
1102
1103   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1104 }
1105
1106 /* Analyze the behavior of memory reference REF within STMT.
1107    There are two modes:
1108
1109    - BB analysis.  In this case we simply split the address into base,
1110      init and offset components, without reference to any containing loop.
1111      The resulting base and offset are general expressions and they can
1112      vary arbitrarily from one iteration of the containing loop to the next.
1113      The step is always zero.
1114
1115    - loop analysis.  In this case we analyze the reference both wrt LOOP
1116      and on the basis that the reference occurs (is "used") in LOOP;
1117      see the comment above analyze_scalar_evolution_in_loop for more
1118      information about this distinction.  The base, init, offset and
1119      step fields are all invariant in LOOP.
1120
1121    Perform BB analysis if LOOP is null, or if LOOP is the function's
1122    dummy outermost loop.  In other cases perform loop analysis.
1123
1124    Return true if the analysis succeeded and store the results in DRB if so.
1125    BB analysis can only fail for bitfield or reversed-storage accesses.  */
1126
1127 opt_result
1128 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1129                       class loop *loop, const gimple *stmt)
1130 {
1131   poly_int64 pbitsize, pbitpos;
1132   tree base, poffset;
1133   machine_mode pmode;
1134   int punsignedp, preversep, pvolatilep;
1135   affine_iv base_iv, offset_iv;
1136   tree init, dinit, step;
1137   bool in_loop = (loop && loop->num);
1138
1139   if (dump_file && (dump_flags & TDF_DETAILS))
1140     fprintf (dump_file, "analyze_innermost: ");
1141
1142   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1143                               &punsignedp, &preversep, &pvolatilep);
1144   gcc_assert (base != NULL_TREE);
1145
1146   poly_int64 pbytepos;
1147   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
1148     return opt_result::failure_at (stmt,
1149                                    "failed: bit offset alignment.\n");
1150
1151   if (preversep)
1152     return opt_result::failure_at (stmt,
1153                                    "failed: reverse storage order.\n");
1154
1155   /* Calculate the alignment and misalignment for the inner reference.  */
1156   unsigned int HOST_WIDE_INT bit_base_misalignment;
1157   unsigned int bit_base_alignment;
1158   get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1159
1160   /* There are no bitfield references remaining in BASE, so the values
1161      we got back must be whole bytes.  */
1162   gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1163               && bit_base_misalignment % BITS_PER_UNIT == 0);
1164   unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1165   poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1166
1167   if (TREE_CODE (base) == MEM_REF)
1168     {
1169       if (!integer_zerop (TREE_OPERAND (base, 1)))
1170         {
1171           /* Subtract MOFF from the base and add it to POFFSET instead.
1172              Adjust the misalignment to reflect the amount we subtracted.  */
1173           poly_offset_int moff = mem_ref_offset (base);
1174           base_misalignment -= moff.force_shwi ();
1175           tree mofft = wide_int_to_tree (sizetype, moff);
1176           if (!poffset)
1177             poffset = mofft;
1178           else
1179             poffset = size_binop (PLUS_EXPR, poffset, mofft);
1180         }
1181       base = TREE_OPERAND (base, 0);
1182     }
1183   else
1184     base = build_fold_addr_expr (base);
1185
1186   if (in_loop)
1187     {
1188       if (!simple_iv (loop, loop, base, &base_iv, true))
1189         return opt_result::failure_at
1190           (stmt, "failed: evolution of base is not affine.\n");
1191     }
1192   else
1193     {
1194       base_iv.base = base;
1195       base_iv.step = ssize_int (0);
1196       base_iv.no_overflow = true;
1197     }
1198
1199   if (!poffset)
1200     {
1201       offset_iv.base = ssize_int (0);
1202       offset_iv.step = ssize_int (0);
1203     }
1204   else
1205     {
1206       if (!in_loop)
1207         {
1208           offset_iv.base = poffset;
1209           offset_iv.step = ssize_int (0);
1210         }
1211       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1212         return opt_result::failure_at
1213           (stmt, "failed: evolution of offset is not affine.\n");
1214     }
1215
1216   init = ssize_int (pbytepos);
1217
1218   /* Subtract any constant component from the base and add it to INIT instead.
1219      Adjust the misalignment to reflect the amount we subtracted.  */
1220   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1221   init = size_binop (PLUS_EXPR, init, dinit);
1222   base_misalignment -= TREE_INT_CST_LOW (dinit);
1223
1224   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1225   init = size_binop (PLUS_EXPR, init, dinit);
1226
1227   step = size_binop (PLUS_EXPR,
1228                      fold_convert (ssizetype, base_iv.step),
1229                      fold_convert (ssizetype, offset_iv.step));
1230
1231   base = canonicalize_base_object_address (base_iv.base);
1232
1233   /* See if get_pointer_alignment can guarantee a higher alignment than
1234      the one we calculated above.  */
1235   unsigned int HOST_WIDE_INT alt_misalignment;
1236   unsigned int alt_alignment;
1237   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1238
1239   /* As above, these values must be whole bytes.  */
1240   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1241               && alt_misalignment % BITS_PER_UNIT == 0);
1242   alt_alignment /= BITS_PER_UNIT;
1243   alt_misalignment /= BITS_PER_UNIT;
1244
1245   if (base_alignment < alt_alignment)
1246     {
1247       base_alignment = alt_alignment;
1248       base_misalignment = alt_misalignment;
1249     }
1250
1251   drb->base_address = base;
1252   drb->offset = fold_convert (ssizetype, offset_iv.base);
1253   drb->init = init;
1254   drb->step = step;
1255   if (known_misalignment (base_misalignment, base_alignment,
1256                           &drb->base_misalignment))
1257     drb->base_alignment = base_alignment;
1258   else
1259     {
1260       drb->base_alignment = known_alignment (base_misalignment);
1261       drb->base_misalignment = 0;
1262     }
1263   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1264   drb->step_alignment = highest_pow2_factor (step);
1265
1266   if (dump_file && (dump_flags & TDF_DETAILS))
1267     fprintf (dump_file, "success.\n");
1268
1269   return opt_result::success ();
1270 }
1271
1272 /* Return true if OP is a valid component reference for a DR access
1273    function.  This accepts a subset of what handled_component_p accepts.  */
1274
1275 static bool
1276 access_fn_component_p (tree op)
1277 {
1278   switch (TREE_CODE (op))
1279     {
1280     case REALPART_EXPR:
1281     case IMAGPART_EXPR:
1282     case ARRAY_REF:
1283       return true;
1284
1285     case COMPONENT_REF:
1286       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1287
1288     default:
1289       return false;
1290     }
1291 }
1292
1293 /* Returns whether BASE can have a access_fn_component_p with BASE
1294    as base.  */
1295
1296 static bool
1297 base_supports_access_fn_components_p (tree base)
1298 {
1299   switch (TREE_CODE (TREE_TYPE (base)))
1300     {
1301     case COMPLEX_TYPE:
1302     case ARRAY_TYPE:
1303     case RECORD_TYPE:
1304       return true;
1305     default:
1306       return false;
1307     }
1308 }
1309
1310 /* Determines the base object and the list of indices of memory reference
1311    DR, analyzed in LOOP and instantiated before NEST.  */
1312
1313 static void
1314 dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
1315 {
1316   /* If analyzing a basic-block there are no indices to analyze
1317      and thus no access functions.  */
1318   if (!nest)
1319     {
1320       dri->base_object = ref;
1321       dri->access_fns.create (0);
1322       return;
1323     }
1324
1325   vec<tree> access_fns = vNULL;
1326
1327   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1328      into a two element array with a constant index.  The base is
1329      then just the immediate underlying object.  */
1330   if (TREE_CODE (ref) == REALPART_EXPR)
1331     {
1332       ref = TREE_OPERAND (ref, 0);
1333       access_fns.safe_push (integer_zero_node);
1334     }
1335   else if (TREE_CODE (ref) == IMAGPART_EXPR)
1336     {
1337       ref = TREE_OPERAND (ref, 0);
1338       access_fns.safe_push (integer_one_node);
1339     }
1340
1341   /* Analyze access functions of dimensions we know to be independent.
1342      The list of component references handled here should be kept in
1343      sync with access_fn_component_p.  */
1344   while (handled_component_p (ref))
1345     {
1346       if (TREE_CODE (ref) == ARRAY_REF)
1347         {
1348           tree op = TREE_OPERAND (ref, 1);
1349           tree access_fn = analyze_scalar_evolution (loop, op);
1350           access_fn = instantiate_scev (nest, loop, access_fn);
1351           access_fns.safe_push (access_fn);
1352         }
1353       else if (TREE_CODE (ref) == COMPONENT_REF
1354                && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1355         {
1356           /* For COMPONENT_REFs of records (but not unions!) use the
1357              FIELD_DECL offset as constant access function so we can
1358              disambiguate a[i].f1 and a[i].f2.  */
1359           tree off = component_ref_field_offset (ref);
1360           off = size_binop (PLUS_EXPR,
1361                             size_binop (MULT_EXPR,
1362                                         fold_convert (bitsizetype, off),
1363                                         bitsize_int (BITS_PER_UNIT)),
1364                             DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1365           access_fns.safe_push (off);
1366         }
1367       else
1368         /* If we have an unhandled component we could not translate
1369            to an access function stop analyzing.  We have determined
1370            our base object in this case.  */
1371         break;
1372
1373       ref = TREE_OPERAND (ref, 0);
1374     }
1375
1376   /* If the address operand of a MEM_REF base has an evolution in the
1377      analyzed nest, add it as an additional independent access-function.  */
1378   if (TREE_CODE (ref) == MEM_REF)
1379     {
1380       tree op = TREE_OPERAND (ref, 0);
1381       tree access_fn = analyze_scalar_evolution (loop, op);
1382       access_fn = instantiate_scev (nest, loop, access_fn);
1383       STRIP_NOPS (access_fn);
1384       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1385         {
1386           tree memoff = TREE_OPERAND (ref, 1);
1387           tree base = initial_condition (access_fn);
1388           tree orig_type = TREE_TYPE (base);
1389           STRIP_USELESS_TYPE_CONVERSION (base);
1390           tree off;
1391           split_constant_offset (base, &base, &off);
1392           STRIP_USELESS_TYPE_CONVERSION (base);
1393           /* Fold the MEM_REF offset into the evolutions initial
1394              value to make more bases comparable.  */
1395           if (!integer_zerop (memoff))
1396             {
1397               off = size_binop (PLUS_EXPR, off,
1398                                 fold_convert (ssizetype, memoff));
1399               memoff = build_int_cst (TREE_TYPE (memoff), 0);
1400             }
1401           /* Adjust the offset so it is a multiple of the access type
1402              size and thus we separate bases that can possibly be used
1403              to produce partial overlaps (which the access_fn machinery
1404              cannot handle).  */
1405           wide_int rem;
1406           if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1407               && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1408               && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1409             rem = wi::mod_trunc
1410               (wi::to_wide (off),
1411                wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1412                SIGNED);
1413           else
1414             /* If we can't compute the remainder simply force the initial
1415                condition to zero.  */
1416             rem = wi::to_wide (off);
1417           off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1418           memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1419           /* And finally replace the initial condition.  */
1420           access_fn = chrec_replace_initial_condition
1421               (access_fn, fold_convert (orig_type, off));
1422           /* ???  This is still not a suitable base object for
1423              dr_may_alias_p - the base object needs to be an
1424              access that covers the object as whole.  With
1425              an evolution in the pointer this cannot be
1426              guaranteed.
1427              As a band-aid, mark the access so we can special-case
1428              it in dr_may_alias_p.  */
1429           tree old = ref;
1430           ref = fold_build2_loc (EXPR_LOCATION (ref),
1431                                  MEM_REF, TREE_TYPE (ref),
1432                                  base, memoff);
1433           MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1434           MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1435           dri->unconstrained_base = true;
1436           access_fns.safe_push (access_fn);
1437         }
1438     }
1439   else if (DECL_P (ref))
1440     {
1441       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1442       ref = build2 (MEM_REF, TREE_TYPE (ref),
1443                     build_fold_addr_expr (ref),
1444                     build_int_cst (reference_alias_ptr_type (ref), 0));
1445     }
1446
1447   dri->base_object = ref;
1448   dri->access_fns = access_fns;
1449 }
1450
1451 /* Extracts the alias analysis information from the memory reference DR.  */
1452
1453 static void
1454 dr_analyze_alias (struct data_reference *dr)
1455 {
1456   tree ref = DR_REF (dr);
1457   tree base = get_base_address (ref), addr;
1458
1459   if (INDIRECT_REF_P (base)
1460       || TREE_CODE (base) == MEM_REF)
1461     {
1462       addr = TREE_OPERAND (base, 0);
1463       if (TREE_CODE (addr) == SSA_NAME)
1464         DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1465     }
1466 }
1467
1468 /* Frees data reference DR.  */
1469
1470 void
1471 free_data_ref (data_reference_p dr)
1472 {
1473   DR_ACCESS_FNS (dr).release ();
1474   if (dr->alt_indices.base_object)
1475     dr->alt_indices.access_fns.release ();
1476   free (dr);
1477 }
1478
1479 /* Analyze memory reference MEMREF, which is accessed in STMT.
1480    The reference is a read if IS_READ is true, otherwise it is a write.
1481    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1482    within STMT, i.e. that it might not occur even if STMT is executed
1483    and runs to completion.
1484
1485    Return the data_reference description of MEMREF.  NEST is the outermost
1486    loop in which the reference should be instantiated, LOOP is the loop
1487    in which the data reference should be analyzed.  */
1488
1489 struct data_reference *
1490 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1491                  bool is_read, bool is_conditional_in_stmt)
1492 {
1493   struct data_reference *dr;
1494
1495   if (dump_file && (dump_flags & TDF_DETAILS))
1496     {
1497       fprintf (dump_file, "Creating dr for ");
1498       print_generic_expr (dump_file, memref, TDF_SLIM);
1499       fprintf (dump_file, "\n");
1500     }
1501
1502   dr = XCNEW (struct data_reference);
1503   DR_STMT (dr) = stmt;
1504   DR_REF (dr) = memref;
1505   DR_IS_READ (dr) = is_read;
1506   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1507
1508   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1509                         nest != NULL ? loop : NULL, stmt);
1510   dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop);
1511   dr_analyze_alias (dr);
1512
1513   if (dump_file && (dump_flags & TDF_DETAILS))
1514     {
1515       unsigned i;
1516       fprintf (dump_file, "\tbase_address: ");
1517       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1518       fprintf (dump_file, "\n\toffset from base address: ");
1519       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1520       fprintf (dump_file, "\n\tconstant offset from base address: ");
1521       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1522       fprintf (dump_file, "\n\tstep: ");
1523       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1524       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1525       fprintf (dump_file, "\n\tbase misalignment: %d",
1526                DR_BASE_MISALIGNMENT (dr));
1527       fprintf (dump_file, "\n\toffset alignment: %d",
1528                DR_OFFSET_ALIGNMENT (dr));
1529       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1530       fprintf (dump_file, "\n\tbase_object: ");
1531       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1532       fprintf (dump_file, "\n");
1533       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1534         {
1535           fprintf (dump_file, "\tAccess function %d: ", i);
1536           print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1537         }
1538     }
1539
1540   return dr;
1541 }
1542
1543 /*  A helper function computes order between two tree expressions T1 and T2.
1544     This is used in comparator functions sorting objects based on the order
1545     of tree expressions.  The function returns -1, 0, or 1.  */
1546
1547 int
1548 data_ref_compare_tree (tree t1, tree t2)
1549 {
1550   int i, cmp;
1551   enum tree_code code;
1552   char tclass;
1553
1554   if (t1 == t2)
1555     return 0;
1556   if (t1 == NULL)
1557     return -1;
1558   if (t2 == NULL)
1559     return 1;
1560
1561   STRIP_USELESS_TYPE_CONVERSION (t1);
1562   STRIP_USELESS_TYPE_CONVERSION (t2);
1563   if (t1 == t2)
1564     return 0;
1565
1566   if (TREE_CODE (t1) != TREE_CODE (t2)
1567       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1568     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1569
1570   code = TREE_CODE (t1);
1571   switch (code)
1572     {
1573     case INTEGER_CST:
1574       return tree_int_cst_compare (t1, t2);
1575
1576     case STRING_CST:
1577       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1578         return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1579       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1580                      TREE_STRING_LENGTH (t1));
1581
1582     case SSA_NAME:
1583       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1584         return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1585       break;
1586
1587     default:
1588       if (POLY_INT_CST_P (t1))
1589         return compare_sizes_for_sort (wi::to_poly_widest (t1),
1590                                        wi::to_poly_widest (t2));
1591
1592       tclass = TREE_CODE_CLASS (code);
1593
1594       /* For decls, compare their UIDs.  */
1595       if (tclass == tcc_declaration)
1596         {
1597           if (DECL_UID (t1) != DECL_UID (t2))
1598             return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1599           break;
1600         }
1601       /* For expressions, compare their operands recursively.  */
1602       else if (IS_EXPR_CODE_CLASS (tclass))
1603         {
1604           for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1605             {
1606               cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1607                                            TREE_OPERAND (t2, i));
1608               if (cmp != 0)
1609                 return cmp;
1610             }
1611         }
1612       else
1613         gcc_unreachable ();
1614     }
1615
1616   return 0;
1617 }
1618
1619 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1620    check.  */
1621
1622 opt_result
1623 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1624 {
1625   if (dump_enabled_p ())
1626     dump_printf (MSG_NOTE,
1627                  "consider run-time aliasing test between %T and %T\n",
1628                  DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1629
1630   if (!speed_p)
1631     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1632                                    "runtime alias check not supported when"
1633                                    " optimizing for size.\n");
1634
1635   /* FORNOW: We don't support versioning with outer-loop in either
1636      vectorization or loop distribution.  */
1637   if (loop != NULL && loop->inner != NULL)
1638     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1639                                    "runtime alias check not supported for"
1640                                    " outer loop.\n");
1641
1642   return opt_result::success ();
1643 }
1644
1645 /* Operator == between two dr_with_seg_len objects.
1646
1647    This equality operator is used to make sure two data refs
1648    are the same one so that we will consider to combine the
1649    aliasing checks of those two pairs of data dependent data
1650    refs.  */
1651
1652 static bool
1653 operator == (const dr_with_seg_len& d1,
1654              const dr_with_seg_len& d2)
1655 {
1656   return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1657                            DR_BASE_ADDRESS (d2.dr), 0)
1658           && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1659           && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1660           && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1661           && known_eq (d1.access_size, d2.access_size)
1662           && d1.align == d2.align);
1663 }
1664
1665 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1666    so that we can combine aliasing checks in one scan.  */
1667
1668 static int
1669 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1670 {
1671   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1672   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1673   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1674   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1675
1676   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1677      if a and c have the same basic address snd step, and b and d have the same
1678      address and step.  Therefore, if any a&c or b&d don't have the same address
1679      and step, we don't care the order of those two pairs after sorting.  */
1680   int comp_res;
1681
1682   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1683                                          DR_BASE_ADDRESS (b1.dr))) != 0)
1684     return comp_res;
1685   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1686                                          DR_BASE_ADDRESS (b2.dr))) != 0)
1687     return comp_res;
1688   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1689                                          DR_STEP (b1.dr))) != 0)
1690     return comp_res;
1691   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1692                                          DR_STEP (b2.dr))) != 0)
1693     return comp_res;
1694   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1695                                          DR_OFFSET (b1.dr))) != 0)
1696     return comp_res;
1697   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1698                                          DR_INIT (b1.dr))) != 0)
1699     return comp_res;
1700   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1701                                          DR_OFFSET (b2.dr))) != 0)
1702     return comp_res;
1703   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1704                                          DR_INIT (b2.dr))) != 0)
1705     return comp_res;
1706
1707   return 0;
1708 }
1709
1710 /* Dump information about ALIAS_PAIR, indenting each line by INDENT.  */
1711
1712 static void
1713 dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1714 {
1715   dump_printf (MSG_NOTE, "%sreference:      %T vs. %T\n", indent,
1716                DR_REF (alias_pair->first.dr),
1717                DR_REF (alias_pair->second.dr));
1718
1719   dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1720                alias_pair->first.seg_len);
1721   if (!operand_equal_p (alias_pair->first.seg_len,
1722                         alias_pair->second.seg_len, 0))
1723     dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1724
1725   dump_printf (MSG_NOTE, "\n%saccess size:    ", indent);
1726   dump_dec (MSG_NOTE, alias_pair->first.access_size);
1727   if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1728     {
1729       dump_printf (MSG_NOTE, " vs. ");
1730       dump_dec (MSG_NOTE, alias_pair->second.access_size);
1731     }
1732
1733   dump_printf (MSG_NOTE, "\n%salignment:      %d", indent,
1734                alias_pair->first.align);
1735   if (alias_pair->first.align != alias_pair->second.align)
1736     dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1737
1738   dump_printf (MSG_NOTE, "\n%sflags:         ", indent);
1739   if (alias_pair->flags & DR_ALIAS_RAW)
1740     dump_printf (MSG_NOTE, " RAW");
1741   if (alias_pair->flags & DR_ALIAS_WAR)
1742     dump_printf (MSG_NOTE, " WAR");
1743   if (alias_pair->flags & DR_ALIAS_WAW)
1744     dump_printf (MSG_NOTE, " WAW");
1745   if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1746     dump_printf (MSG_NOTE, " ARBITRARY");
1747   if (alias_pair->flags & DR_ALIAS_SWAPPED)
1748     dump_printf (MSG_NOTE, " SWAPPED");
1749   if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1750     dump_printf (MSG_NOTE, " UNSWAPPED");
1751   if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1752     dump_printf (MSG_NOTE, " MIXED_STEPS");
1753   if (alias_pair->flags == 0)
1754     dump_printf (MSG_NOTE, " <none>");
1755   dump_printf (MSG_NOTE, "\n");
1756 }
1757
1758 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1759    FACTOR is number of iterations that each data reference is accessed.
1760
1761    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1762    we create an expression:
1763
1764    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1765    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1766
1767    for aliasing checks.  However, in some cases we can decrease the number
1768    of checks by combining two checks into one.  For example, suppose we have
1769    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1770    condition is satisfied:
1771
1772    load_ptr_0 < load_ptr_1  &&
1773    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1774
1775    (this condition means, in each iteration of vectorized loop, the accessed
1776    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1777    load_ptr_1.)
1778
1779    we then can use only the following expression to finish the alising checks
1780    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1781
1782    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1783    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1784
1785    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1786    basic address.  */
1787
1788 void
1789 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1790                                poly_uint64)
1791 {
1792   if (alias_pairs->is_empty ())
1793     return;
1794
1795   /* Canonicalize each pair so that the base components are ordered wrt
1796      data_ref_compare_tree.  This allows the loop below to merge more
1797      cases.  */
1798   unsigned int i;
1799   dr_with_seg_len_pair_t *alias_pair;
1800   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1801     {
1802       data_reference_p dr_a = alias_pair->first.dr;
1803       data_reference_p dr_b = alias_pair->second.dr;
1804       int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1805                                             DR_BASE_ADDRESS (dr_b));
1806       if (comp_res == 0)
1807         comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1808       if (comp_res == 0)
1809         comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1810       if (comp_res > 0)
1811         {
1812           std::swap (alias_pair->first, alias_pair->second);
1813           alias_pair->flags |= DR_ALIAS_SWAPPED;
1814         }
1815       else
1816         alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1817     }
1818
1819   /* Sort the collected data ref pairs so that we can scan them once to
1820      combine all possible aliasing checks.  */
1821   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1822
1823   /* Scan the sorted dr pairs and check if we can combine alias checks
1824      of two neighboring dr pairs.  */
1825   unsigned int last = 0;
1826   for (i = 1; i < alias_pairs->length (); ++i)
1827     {
1828       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1829       dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1830       dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1831
1832       dr_with_seg_len *dr_a1 = &alias_pair1->first;
1833       dr_with_seg_len *dr_b1 = &alias_pair1->second;
1834       dr_with_seg_len *dr_a2 = &alias_pair2->first;
1835       dr_with_seg_len *dr_b2 = &alias_pair2->second;
1836
1837       /* Remove duplicate data ref pairs.  */
1838       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1839         {
1840           if (dump_enabled_p ())
1841             dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1842                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1843                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1844           alias_pair1->flags |= alias_pair2->flags;
1845           continue;
1846         }
1847
1848       /* Assume that we won't be able to merge the pairs, then correct
1849          if we do.  */
1850       last += 1;
1851       if (last != i)
1852         (*alias_pairs)[last] = (*alias_pairs)[i];
1853
1854       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1855         {
1856           /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1857              and DR_A1 and DR_A2 are two consecutive memrefs.  */
1858           if (*dr_a1 == *dr_a2)
1859             {
1860               std::swap (dr_a1, dr_b1);
1861               std::swap (dr_a2, dr_b2);
1862             }
1863
1864           poly_int64 init_a1, init_a2;
1865           /* Only consider cases in which the distance between the initial
1866              DR_A1 and the initial DR_A2 is known at compile time.  */
1867           if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1868                                 DR_BASE_ADDRESS (dr_a2->dr), 0)
1869               || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1870                                    DR_OFFSET (dr_a2->dr), 0)
1871               || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1872               || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1873             continue;
1874
1875           /* Don't combine if we can't tell which one comes first.  */
1876           if (!ordered_p (init_a1, init_a2))
1877             continue;
1878
1879           /* Work out what the segment length would be if we did combine
1880              DR_A1 and DR_A2:
1881
1882              - If DR_A1 and DR_A2 have equal lengths, that length is
1883                also the combined length.
1884
1885              - If DR_A1 and DR_A2 both have negative "lengths", the combined
1886                length is the lower bound on those lengths.
1887
1888              - If DR_A1 and DR_A2 both have positive lengths, the combined
1889                length is the upper bound on those lengths.
1890
1891              Other cases are unlikely to give a useful combination.
1892
1893              The lengths both have sizetype, so the sign is taken from
1894              the step instead.  */
1895           poly_uint64 new_seg_len = 0;
1896           bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1897                                                  dr_a2->seg_len, 0);
1898           if (new_seg_len_p)
1899             {
1900               poly_uint64 seg_len_a1, seg_len_a2;
1901               if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1902                   || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1903                 continue;
1904
1905               tree indicator_a = dr_direction_indicator (dr_a1->dr);
1906               if (TREE_CODE (indicator_a) != INTEGER_CST)
1907                 continue;
1908
1909               tree indicator_b = dr_direction_indicator (dr_a2->dr);
1910               if (TREE_CODE (indicator_b) != INTEGER_CST)
1911                 continue;
1912
1913               int sign_a = tree_int_cst_sgn (indicator_a);
1914               int sign_b = tree_int_cst_sgn (indicator_b);
1915
1916               if (sign_a <= 0 && sign_b <= 0)
1917                 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1918               else if (sign_a >= 0 && sign_b >= 0)
1919                 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1920               else
1921                 continue;
1922             }
1923           /* At this point we're committed to merging the refs.  */
1924
1925           /* Make sure dr_a1 starts left of dr_a2.  */
1926           if (maybe_gt (init_a1, init_a2))
1927             {
1928               std::swap (*dr_a1, *dr_a2);
1929               std::swap (init_a1, init_a2);
1930             }
1931
1932           /* The DR_Bs are equal, so only the DR_As can introduce
1933              mixed steps.  */
1934           if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1935             alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1936
1937           if (new_seg_len_p)
1938             {
1939               dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1940                                               new_seg_len);
1941               dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1942             }
1943
1944           /* This is always positive due to the swap above.  */
1945           poly_uint64 diff = init_a2 - init_a1;
1946
1947           /* The new check will start at DR_A1.  Make sure that its access
1948              size encompasses the initial DR_A2.  */
1949           if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1950             {
1951               dr_a1->access_size = upper_bound (dr_a1->access_size,
1952                                                 diff + dr_a2->access_size);
1953               unsigned int new_align = known_alignment (dr_a1->access_size);
1954               dr_a1->align = MIN (dr_a1->align, new_align);
1955             }
1956           if (dump_enabled_p ())
1957             dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1958                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1959                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1960           alias_pair1->flags |= alias_pair2->flags;
1961           last -= 1;
1962         }
1963     }
1964   alias_pairs->truncate (last + 1);
1965
1966   /* Try to restore the original dr_with_seg_len order within each
1967      dr_with_seg_len_pair_t.  If we ended up combining swapped and
1968      unswapped pairs into the same check, we have to invalidate any
1969      RAW, WAR and WAW information for it.  */
1970   if (dump_enabled_p ())
1971     dump_printf (MSG_NOTE, "merged alias checks:\n");
1972   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1973     {
1974       unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1975       unsigned int swapped = (alias_pair->flags & swap_mask);
1976       if (swapped == DR_ALIAS_SWAPPED)
1977         std::swap (alias_pair->first, alias_pair->second);
1978       else if (swapped != DR_ALIAS_UNSWAPPED)
1979         alias_pair->flags |= DR_ALIAS_ARBITRARY;
1980       alias_pair->flags &= ~swap_mask;
1981       if (dump_enabled_p ())
1982         dump_alias_pair (alias_pair, "  ");
1983     }
1984 }
1985
1986 /* A subroutine of create_intersect_range_checks, with a subset of the
1987    same arguments.  Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
1988    to optimize cases in which the references form a simple RAW, WAR or
1989    WAR dependence.  */
1990
1991 static bool
1992 create_ifn_alias_checks (tree *cond_expr,
1993                          const dr_with_seg_len_pair_t &alias_pair)
1994 {
1995   const dr_with_seg_len& dr_a = alias_pair.first;
1996   const dr_with_seg_len& dr_b = alias_pair.second;
1997
1998   /* Check for cases in which:
1999
2000      (a) we have a known RAW, WAR or WAR dependence
2001      (b) the accesses are well-ordered in both the original and new code
2002          (see the comment above the DR_ALIAS_* flags for details); and
2003      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
2004   if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
2005     return false;
2006
2007   /* Make sure that both DRs access the same pattern of bytes,
2008      with a constant length and step.  */
2009   poly_uint64 seg_len;
2010   if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
2011       || !poly_int_tree_p (dr_a.seg_len, &seg_len)
2012       || maybe_ne (dr_a.access_size, dr_b.access_size)
2013       || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
2014       || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
2015     return false;
2016
2017   unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
2018   tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
2019   tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
2020
2021   /* See whether the target suports what we want to do.  WAW checks are
2022      equivalent to WAR checks here.  */
2023   internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
2024                      ? IFN_CHECK_RAW_PTRS
2025                      : IFN_CHECK_WAR_PTRS);
2026   unsigned int align = MIN (dr_a.align, dr_b.align);
2027   poly_uint64 full_length = seg_len + bytes;
2028   if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2029                                            full_length, align))
2030     {
2031       full_length = seg_len + dr_a.access_size;
2032       if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2033                                                full_length, align))
2034         return false;
2035     }
2036
2037   /* Commit to using this form of test.  */
2038   addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
2039   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2040
2041   addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
2042   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2043
2044   *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
2045                                              ifn, boolean_type_node,
2046                                              4, addr_a, addr_b,
2047                                              size_int (full_length),
2048                                              size_int (align));
2049
2050   if (dump_enabled_p ())
2051     {
2052       if (ifn == IFN_CHECK_RAW_PTRS)
2053         dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
2054       else
2055         dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
2056     }
2057   return true;
2058 }
2059
2060 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
2061    free of aliases, using a condition based on index values instead
2062    of a condition based on addresses.  Return true on success,
2063    storing the condition in *COND_EXPR.
2064
2065    This can only be done if the two data references in ALIAS_PAIR access
2066    the same array object and the index is the only difference.  For example,
2067    if the two data references are DR_A and DR_B:
2068
2069                        DR_A                           DR_B
2070       data-ref         arr[i]                         arr[j]
2071       base_object      arr                            arr
2072       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
2073
2074    The addresses and their index are like:
2075
2076         |<- ADDR_A    ->|          |<- ADDR_B    ->|
2077      ------------------------------------------------------->
2078         |   |   |   |   |          |   |   |   |   |
2079      ------------------------------------------------------->
2080         i_0 ...         i_0+4      j_0 ...         j_0+4
2081
2082    We can create expression based on index rather than address:
2083
2084      (unsigned) (i_0 - j_0 + 3) <= 6
2085
2086    i.e. the indices are less than 4 apart.
2087
2088    Note evolution step of index needs to be considered in comparison.  */
2089
2090 static bool
2091 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
2092                                      const dr_with_seg_len_pair_t &alias_pair)
2093 {
2094   const dr_with_seg_len &dr_a = alias_pair.first;
2095   const dr_with_seg_len &dr_b = alias_pair.second;
2096   if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
2097       || integer_zerop (DR_STEP (dr_a.dr))
2098       || integer_zerop (DR_STEP (dr_b.dr))
2099       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2100     return false;
2101
2102   poly_uint64 seg_len1, seg_len2;
2103   if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
2104       || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
2105     return false;
2106
2107   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2108     return false;
2109
2110   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2111     return false;
2112
2113   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2114     return false;
2115
2116   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2117
2118   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2119   unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
2120   if (neg_step)
2121     {
2122       abs_step = -abs_step;
2123       seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
2124       seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
2125     }
2126
2127   /* Infer the number of iterations with which the memory segment is accessed
2128      by DR.  In other words, alias is checked if memory segment accessed by
2129      DR_A in some iterations intersect with memory segment accessed by DR_B
2130      in the same amount iterations.
2131      Note segnment length is a linear function of number of iterations with
2132      DR_STEP as the coefficient.  */
2133   poly_uint64 niter_len1, niter_len2;
2134   if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
2135       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
2136     return false;
2137
2138   /* Divide each access size by the byte step, rounding up.  */
2139   poly_uint64 niter_access1, niter_access2;
2140   if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
2141                         abs_step, &niter_access1)
2142       || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
2143                            abs_step, &niter_access2))
2144     return false;
2145
2146   bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
2147
2148   int found = -1;
2149   for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2150     {
2151       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2152       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2153       /* Two indices must be the same if they are not scev, or not scev wrto
2154          current loop being vecorized.  */
2155       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2156           || TREE_CODE (access2) != POLYNOMIAL_CHREC
2157           || CHREC_VARIABLE (access1) != (unsigned)loop->num
2158           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2159         {
2160           if (operand_equal_p (access1, access2, 0))
2161             continue;
2162
2163           return false;
2164         }
2165       if (found >= 0)
2166         return false;
2167       found = i;
2168     }
2169
2170   /* Ought not to happen in practice, since if all accesses are equal then the
2171      alias should be decidable at compile time.  */
2172   if (found < 0)
2173     return false;
2174
2175   /* The two indices must have the same step.  */
2176   tree access1 = DR_ACCESS_FN (dr_a.dr, found);
2177   tree access2 = DR_ACCESS_FN (dr_b.dr, found);
2178   if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2179     return false;
2180
2181   tree idx_step = CHREC_RIGHT (access1);
2182   /* Index must have const step, otherwise DR_STEP won't be constant.  */
2183   gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2184   /* Index must evaluate in the same direction as DR.  */
2185   gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2186
2187   tree min1 = CHREC_LEFT (access1);
2188   tree min2 = CHREC_LEFT (access2);
2189   if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2190     return false;
2191
2192   /* Ideally, alias can be checked against loop's control IV, but we
2193      need to prove linear mapping between control IV and reference
2194      index.  Although that should be true, we check against (array)
2195      index of data reference.  Like segment length, index length is
2196      linear function of the number of iterations with index_step as
2197      the coefficient, i.e, niter_len * idx_step.  */
2198   offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
2199                                               SIGNED);
2200   if (neg_step)
2201     abs_idx_step = -abs_idx_step;
2202   poly_offset_int idx_len1 = abs_idx_step * niter_len1;
2203   poly_offset_int idx_len2 = abs_idx_step * niter_len2;
2204   poly_offset_int idx_access1 = abs_idx_step * niter_access1;
2205   poly_offset_int idx_access2 = abs_idx_step * niter_access2;
2206
2207   gcc_assert (known_ge (idx_len1, 0)
2208               && known_ge (idx_len2, 0)
2209               && known_ge (idx_access1, 0)
2210               && known_ge (idx_access2, 0));
2211
2212   /* Each access has the following pattern, with lengths measured
2213      in units of INDEX:
2214
2215           <-- idx_len -->
2216           <--- A: -ve step --->
2217           +-----+-------+-----+-------+-----+
2218           | n-1 | ..... |  0  | ..... | n-1 |
2219           +-----+-------+-----+-------+-----+
2220                         <--- B: +ve step --->
2221                         <-- idx_len -->
2222                         |
2223                        min
2224
2225      where "n" is the number of scalar iterations covered by the segment
2226      and where each access spans idx_access units.
2227
2228      A is the range of bytes accessed when the step is negative,
2229      B is the range when the step is positive.
2230
2231      When checking for general overlap, we need to test whether
2232      the range:
2233
2234        [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2235
2236      overlaps:
2237
2238        [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2239
2240      where:
2241
2242         low_offsetN = +ve step ? 0 : -idx_lenN;
2243        high_offsetN = +ve step ? idx_lenN : 0;
2244
2245      This is equivalent to testing whether:
2246
2247        min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2248        && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2249
2250      Converting this into a single test, there is an overlap if:
2251
2252        0 <= min2 - min1 + bias <= limit
2253
2254      where  bias = high_offset2 + idx_access2 - 1 - low_offset1
2255            limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2256                  + (high_offset2 - low_offset2 + idx_access2 - 1)
2257       i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2258
2259      Combining the tests requires limit to be computable in an unsigned
2260      form of the index type; if it isn't, we fall back to the usual
2261      pointer-based checks.
2262
2263      We can do better if DR_B is a write and if DR_A and DR_B are
2264      well-ordered in both the original and the new code (see the
2265      comment above the DR_ALIAS_* flags for details).  In this case
2266      we know that for each i in [0, n-1], the write performed by
2267      access i of DR_B occurs after access numbers j<=i of DR_A in
2268      both the original and the new code.  Any write or anti
2269      dependencies wrt those DR_A accesses are therefore maintained.
2270
2271      We just need to make sure that each individual write in DR_B does not
2272      overlap any higher-indexed access in DR_A; such DR_A accesses happen
2273      after the DR_B access in the original code but happen before it in
2274      the new code.
2275
2276      We know the steps for both accesses are equal, so by induction, we
2277      just need to test whether the first write of DR_B overlaps a later
2278      access of DR_A.  In other words, we need to move min1 along by
2279      one iteration:
2280
2281        min1' = min1 + idx_step
2282
2283      and use the ranges:
2284
2285        [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2286
2287      and:
2288
2289        [min2, min2 + idx_access2 - 1]
2290
2291      where:
2292
2293         low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2294        high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0.  */
2295   if (waw_or_war_p)
2296     idx_len1 -= abs_idx_step;
2297
2298   poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2299   if (!waw_or_war_p)
2300     limit += idx_len2;
2301
2302   tree utype = unsigned_type_for (TREE_TYPE (min1));
2303   if (!wi::fits_to_tree_p (limit, utype))
2304     return false;
2305
2306   poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2307   poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2308   poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2309   /* Equivalent to adding IDX_STEP to MIN1.  */
2310   if (waw_or_war_p)
2311     bias -= wi::to_offset (idx_step);
2312
2313   tree subject = fold_build2 (MINUS_EXPR, utype,
2314                               fold_convert (utype, min2),
2315                               fold_convert (utype, min1));
2316   subject = fold_build2 (PLUS_EXPR, utype, subject,
2317                          wide_int_to_tree (utype, bias));
2318   tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2319                                      wide_int_to_tree (utype, limit));
2320   if (*cond_expr)
2321     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2322                               *cond_expr, part_cond_expr);
2323   else
2324     *cond_expr = part_cond_expr;
2325   if (dump_enabled_p ())
2326     {
2327       if (waw_or_war_p)
2328         dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2329       else
2330         dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2331     }
2332   return true;
2333 }
2334
2335 /* A subroutine of create_intersect_range_checks, with a subset of the
2336    same arguments.  Try to optimize cases in which the second access
2337    is a write and in which some overlap is valid.  */
2338
2339 static bool
2340 create_waw_or_war_checks (tree *cond_expr,
2341                           const dr_with_seg_len_pair_t &alias_pair)
2342 {
2343   const dr_with_seg_len& dr_a = alias_pair.first;
2344   const dr_with_seg_len& dr_b = alias_pair.second;
2345
2346   /* Check for cases in which:
2347
2348      (a) DR_B is always a write;
2349      (b) the accesses are well-ordered in both the original and new code
2350          (see the comment above the DR_ALIAS_* flags for details); and
2351      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
2352   if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2353     return false;
2354
2355   /* Check for equal (but possibly variable) steps.  */
2356   tree step = DR_STEP (dr_a.dr);
2357   if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2358     return false;
2359
2360   /* Make sure that we can operate on sizetype without loss of precision.  */
2361   tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2362   if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2363     return false;
2364
2365   /* All addresses involved are known to have a common alignment ALIGN.
2366      We can therefore subtract ALIGN from an exclusive endpoint to get
2367      an inclusive endpoint.  In the best (and common) case, ALIGN is the
2368      same as the access sizes of both DRs, and so subtracting ALIGN
2369      cancels out the addition of an access size.  */
2370   unsigned int align = MIN (dr_a.align, dr_b.align);
2371   poly_uint64 last_chunk_a = dr_a.access_size - align;
2372   poly_uint64 last_chunk_b = dr_b.access_size - align;
2373
2374   /* Get a boolean expression that is true when the step is negative.  */
2375   tree indicator = dr_direction_indicator (dr_a.dr);
2376   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2377                                fold_convert (ssizetype, indicator),
2378                                ssize_int (0));
2379
2380   /* Get lengths in sizetype.  */
2381   tree seg_len_a
2382     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2383   step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2384
2385   /* Each access has the following pattern:
2386
2387           <- |seg_len| ->
2388           <--- A: -ve step --->
2389           +-----+-------+-----+-------+-----+
2390           | n-1 | ..... |  0  | ..... | n-1 |
2391           +-----+-------+-----+-------+-----+
2392                         <--- B: +ve step --->
2393                         <- |seg_len| ->
2394                         |
2395                    base address
2396
2397      where "n" is the number of scalar iterations covered by the segment.
2398
2399      A is the range of bytes accessed when the step is negative,
2400      B is the range when the step is positive.
2401
2402      We know that DR_B is a write.  We also know (from checking that
2403      DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2404      the write performed by access i of DR_B occurs after access numbers
2405      j<=i of DR_A in both the original and the new code.  Any write or
2406      anti dependencies wrt those DR_A accesses are therefore maintained.
2407
2408      We just need to make sure that each individual write in DR_B does not
2409      overlap any higher-indexed access in DR_A; such DR_A accesses happen
2410      after the DR_B access in the original code but happen before it in
2411      the new code.
2412
2413      We know the steps for both accesses are equal, so by induction, we
2414      just need to test whether the first write of DR_B overlaps a later
2415      access of DR_A.  In other words, we need to move addr_a along by
2416      one iteration:
2417
2418        addr_a' = addr_a + step
2419
2420      and check whether:
2421
2422        [addr_b, addr_b + last_chunk_b]
2423
2424      overlaps:
2425
2426        [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2427
2428      where [low_offset_a, high_offset_a] spans accesses [1, n-1].  I.e.:
2429
2430         low_offset_a = +ve step ? 0 : seg_len_a - step
2431        high_offset_a = +ve step ? seg_len_a - step : 0
2432
2433      This is equivalent to testing whether:
2434
2435        addr_a' + low_offset_a <= addr_b + last_chunk_b
2436        && addr_b <= addr_a' + high_offset_a + last_chunk_a
2437
2438      Converting this into a single test, there is an overlap if:
2439
2440        0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2441
2442      where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2443
2444      If DR_A is performed, limit + |step| - last_chunk_b is known to be
2445      less than the size of the object underlying DR_A.  We also know
2446      that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2447      guaranteed at compile time.  There can therefore be no overflow if
2448      "limit" is calculated in an unsigned type with pointer precision.  */
2449   tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2450                                          DR_OFFSET (dr_a.dr));
2451   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2452
2453   tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2454                                          DR_OFFSET (dr_b.dr));
2455   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2456
2457   /* Advance ADDR_A by one iteration and adjust the length to compensate.  */
2458   addr_a = fold_build_pointer_plus (addr_a, step);
2459   tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2460                                            seg_len_a, step);
2461   if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2462     seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2463
2464   tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2465                                    seg_len_a_minus_step, size_zero_node);
2466   if (!CONSTANT_CLASS_P (low_offset_a))
2467     low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2468
2469   /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2470      but it's usually more efficient to reuse the LOW_OFFSET_A result.  */
2471   tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2472                                     low_offset_a);
2473
2474   /* The amount added to addr_b - addr_a'.  */
2475   tree bias = fold_build2 (MINUS_EXPR, sizetype,
2476                            size_int (last_chunk_b), low_offset_a);
2477
2478   tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2479   limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2480                        size_int (last_chunk_a + last_chunk_b));
2481
2482   tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
2483   subject = fold_build2 (PLUS_EXPR, sizetype,
2484                          fold_convert (sizetype, subject), bias);
2485
2486   *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2487   if (dump_enabled_p ())
2488     dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2489   return true;
2490 }
2491
2492 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2493    every address ADDR accessed by D:
2494
2495      *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2496
2497    In this case, every element accessed by D is aligned to at least
2498    ALIGN bytes.
2499
2500    If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2501
2502      *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT.  */
2503
2504 static void
2505 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2506                      tree *seg_max_out, HOST_WIDE_INT align)
2507 {
2508   /* Each access has the following pattern:
2509
2510           <- |seg_len| ->
2511           <--- A: -ve step --->
2512           +-----+-------+-----+-------+-----+
2513           | n-1 | ,.... |  0  | ..... | n-1 |
2514           +-----+-------+-----+-------+-----+
2515                         <--- B: +ve step --->
2516                         <- |seg_len| ->
2517                         |
2518                    base address
2519
2520      where "n" is the number of scalar iterations covered by the segment.
2521      (This should be VF for a particular pair if we know that both steps
2522      are the same, otherwise it will be the full number of scalar loop
2523      iterations.)
2524
2525      A is the range of bytes accessed when the step is negative,
2526      B is the range when the step is positive.
2527
2528      If the access size is "access_size" bytes, the lowest addressed byte is:
2529
2530          base + (step < 0 ? seg_len : 0)   [LB]
2531
2532      and the highest addressed byte is always below:
2533
2534          base + (step < 0 ? 0 : seg_len) + access_size   [UB]
2535
2536      Thus:
2537
2538          LB <= ADDR < UB
2539
2540      If ALIGN is nonzero, all three values are aligned to at least ALIGN
2541      bytes, so:
2542
2543          LB <= ADDR <= UB - ALIGN
2544
2545      where "- ALIGN" folds naturally with the "+ access_size" and often
2546      cancels it out.
2547
2548      We don't try to simplify LB and UB beyond this (e.g. by using
2549      MIN and MAX based on whether seg_len rather than the stride is
2550      negative) because it is possible for the absolute size of the
2551      segment to overflow the range of a ssize_t.
2552
2553      Keeping the pointer_plus outside of the cond_expr should allow
2554      the cond_exprs to be shared with other alias checks.  */
2555   tree indicator = dr_direction_indicator (d.dr);
2556   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2557                                fold_convert (ssizetype, indicator),
2558                                ssize_int (0));
2559   tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2560                                             DR_OFFSET (d.dr));
2561   addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2562   tree seg_len
2563     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2564
2565   tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2566                                 seg_len, size_zero_node);
2567   tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2568                                 size_zero_node, seg_len);
2569   max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2570                            size_int (d.access_size - align));
2571
2572   *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2573   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2574 }
2575
2576 /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2577    storing the condition in *COND_EXPR.  The fallback is to generate a
2578    a test that the two accesses do not overlap:
2579
2580      end_a <= start_b || end_b <= start_a.  */
2581
2582 static void
2583 create_intersect_range_checks (class loop *loop, tree *cond_expr,
2584                                const dr_with_seg_len_pair_t &alias_pair)
2585 {
2586   const dr_with_seg_len& dr_a = alias_pair.first;
2587   const dr_with_seg_len& dr_b = alias_pair.second;
2588   *cond_expr = NULL_TREE;
2589   if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2590     return;
2591
2592   if (create_ifn_alias_checks (cond_expr, alias_pair))
2593     return;
2594
2595   if (create_waw_or_war_checks (cond_expr, alias_pair))
2596     return;
2597
2598   unsigned HOST_WIDE_INT min_align;
2599   tree_code cmp_code;
2600   /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2601      are equivalent.  This is just an optimization heuristic.  */
2602   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2603       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2604     {
2605       /* In this case adding access_size to seg_len is likely to give
2606          a simple X * step, where X is either the number of scalar
2607          iterations or the vectorization factor.  We're better off
2608          keeping that, rather than subtracting an alignment from it.
2609
2610          In this case the maximum values are exclusive and so there is
2611          no alias if the maximum of one segment equals the minimum
2612          of another.  */
2613       min_align = 0;
2614       cmp_code = LE_EXPR;
2615     }
2616   else
2617     {
2618       /* Calculate the minimum alignment shared by all four pointers,
2619          then arrange for this alignment to be subtracted from the
2620          exclusive maximum values to get inclusive maximum values.
2621          This "- min_align" is cumulative with a "+ access_size"
2622          in the calculation of the maximum values.  In the best
2623          (and common) case, the two cancel each other out, leaving
2624          us with an inclusive bound based only on seg_len.  In the
2625          worst case we're simply adding a smaller number than before.
2626
2627          Because the maximum values are inclusive, there is an alias
2628          if the maximum value of one segment is equal to the minimum
2629          value of the other.  */
2630       min_align = MIN (dr_a.align, dr_b.align);
2631       cmp_code = LT_EXPR;
2632     }
2633
2634   tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2635   get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2636   get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2637
2638   *cond_expr
2639     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2640         fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2641         fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2642   if (dump_enabled_p ())
2643     dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2644 }
2645
2646 /* Create a conditional expression that represents the run-time checks for
2647    overlapping of address ranges represented by a list of data references
2648    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
2649    COND_EXPR is the conditional expression to be used in the if statement
2650    that controls which version of the loop gets executed at runtime.  */
2651
2652 void
2653 create_runtime_alias_checks (class loop *loop,
2654                              const vec<dr_with_seg_len_pair_t> *alias_pairs,
2655                              tree * cond_expr)
2656 {
2657   tree part_cond_expr;
2658
2659   fold_defer_overflow_warnings ();
2660   for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
2661     {
2662       gcc_assert (alias_pair.flags);
2663       if (dump_enabled_p ())
2664         dump_printf (MSG_NOTE,
2665                      "create runtime check for data references %T and %T\n",
2666                      DR_REF (alias_pair.first.dr),
2667                      DR_REF (alias_pair.second.dr));
2668
2669       /* Create condition expression for each pair data references.  */
2670       create_intersect_range_checks (loop, &part_cond_expr, alias_pair);
2671       if (*cond_expr)
2672         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2673                                   *cond_expr, part_cond_expr);
2674       else
2675         *cond_expr = part_cond_expr;
2676     }
2677   fold_undefer_and_ignore_overflow_warnings ();
2678 }
2679
2680 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2681    expressions.  */
2682 static bool
2683 dr_equal_offsets_p1 (tree offset1, tree offset2)
2684 {
2685   bool res;
2686
2687   STRIP_NOPS (offset1);
2688   STRIP_NOPS (offset2);
2689
2690   if (offset1 == offset2)
2691     return true;
2692
2693   if (TREE_CODE (offset1) != TREE_CODE (offset2)
2694       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2695     return false;
2696
2697   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2698                              TREE_OPERAND (offset2, 0));
2699
2700   if (!res || !BINARY_CLASS_P (offset1))
2701     return res;
2702
2703   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2704                              TREE_OPERAND (offset2, 1));
2705
2706   return res;
2707 }
2708
2709 /* Check if DRA and DRB have equal offsets.  */
2710 bool
2711 dr_equal_offsets_p (struct data_reference *dra,
2712                     struct data_reference *drb)
2713 {
2714   tree offset1, offset2;
2715
2716   offset1 = DR_OFFSET (dra);
2717   offset2 = DR_OFFSET (drb);
2718
2719   return dr_equal_offsets_p1 (offset1, offset2);
2720 }
2721
2722 /* Returns true if FNA == FNB.  */
2723
2724 static bool
2725 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2726 {
2727   unsigned i, n = fna.length ();
2728
2729   if (n != fnb.length ())
2730     return false;
2731
2732   for (i = 0; i < n; i++)
2733     if (!operand_equal_p (fna[i], fnb[i], 0))
2734       return false;
2735
2736   return true;
2737 }
2738
2739 /* If all the functions in CF are the same, returns one of them,
2740    otherwise returns NULL.  */
2741
2742 static affine_fn
2743 common_affine_function (conflict_function *cf)
2744 {
2745   unsigned i;
2746   affine_fn comm;
2747
2748   if (!CF_NONTRIVIAL_P (cf))
2749     return affine_fn ();
2750
2751   comm = cf->fns[0];
2752
2753   for (i = 1; i < cf->n; i++)
2754     if (!affine_function_equal_p (comm, cf->fns[i]))
2755       return affine_fn ();
2756
2757   return comm;
2758 }
2759
2760 /* Returns the base of the affine function FN.  */
2761
2762 static tree
2763 affine_function_base (affine_fn fn)
2764 {
2765   return fn[0];
2766 }
2767
2768 /* Returns true if FN is a constant.  */
2769
2770 static bool
2771 affine_function_constant_p (affine_fn fn)
2772 {
2773   unsigned i;
2774   tree coef;
2775
2776   for (i = 1; fn.iterate (i, &coef); i++)
2777     if (!integer_zerop (coef))
2778       return false;
2779
2780   return true;
2781 }
2782
2783 /* Returns true if FN is the zero constant function.  */
2784
2785 static bool
2786 affine_function_zero_p (affine_fn fn)
2787 {
2788   return (integer_zerop (affine_function_base (fn))
2789           && affine_function_constant_p (fn));
2790 }
2791
2792 /* Returns a signed integer type with the largest precision from TA
2793    and TB.  */
2794
2795 static tree
2796 signed_type_for_types (tree ta, tree tb)
2797 {
2798   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2799     return signed_type_for (ta);
2800   else
2801     return signed_type_for (tb);
2802 }
2803
2804 /* Applies operation OP on affine functions FNA and FNB, and returns the
2805    result.  */
2806
2807 static affine_fn
2808 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2809 {
2810   unsigned i, n, m;
2811   affine_fn ret;
2812   tree coef;
2813
2814   if (fnb.length () > fna.length ())
2815     {
2816       n = fna.length ();
2817       m = fnb.length ();
2818     }
2819   else
2820     {
2821       n = fnb.length ();
2822       m = fna.length ();
2823     }
2824
2825   ret.create (m);
2826   for (i = 0; i < n; i++)
2827     {
2828       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2829                                          TREE_TYPE (fnb[i]));
2830       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2831     }
2832
2833   for (; fna.iterate (i, &coef); i++)
2834     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2835                                  coef, integer_zero_node));
2836   for (; fnb.iterate (i, &coef); i++)
2837     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2838                                  integer_zero_node, coef));
2839
2840   return ret;
2841 }
2842
2843 /* Returns the sum of affine functions FNA and FNB.  */
2844
2845 static affine_fn
2846 affine_fn_plus (affine_fn fna, affine_fn fnb)
2847 {
2848   return affine_fn_op (PLUS_EXPR, fna, fnb);
2849 }
2850
2851 /* Returns the difference of affine functions FNA and FNB.  */
2852
2853 static affine_fn
2854 affine_fn_minus (affine_fn fna, affine_fn fnb)
2855 {
2856   return affine_fn_op (MINUS_EXPR, fna, fnb);
2857 }
2858
2859 /* Frees affine function FN.  */
2860
2861 static void
2862 affine_fn_free (affine_fn fn)
2863 {
2864   fn.release ();
2865 }
2866
2867 /* Determine for each subscript in the data dependence relation DDR
2868    the distance.  */
2869
2870 static void
2871 compute_subscript_distance (struct data_dependence_relation *ddr)
2872 {
2873   conflict_function *cf_a, *cf_b;
2874   affine_fn fn_a, fn_b, diff;
2875
2876   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2877     {
2878       unsigned int i;
2879
2880       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2881         {
2882           struct subscript *subscript;
2883
2884           subscript = DDR_SUBSCRIPT (ddr, i);
2885           cf_a = SUB_CONFLICTS_IN_A (subscript);
2886           cf_b = SUB_CONFLICTS_IN_B (subscript);
2887
2888           fn_a = common_affine_function (cf_a);
2889           fn_b = common_affine_function (cf_b);
2890           if (!fn_a.exists () || !fn_b.exists ())
2891             {
2892               SUB_DISTANCE (subscript) = chrec_dont_know;
2893               return;
2894             }
2895           diff = affine_fn_minus (fn_a, fn_b);
2896
2897           if (affine_function_constant_p (diff))
2898             SUB_DISTANCE (subscript) = affine_function_base (diff);
2899           else
2900             SUB_DISTANCE (subscript) = chrec_dont_know;
2901
2902           affine_fn_free (diff);
2903         }
2904     }
2905 }
2906
2907 /* Returns the conflict function for "unknown".  */
2908
2909 static conflict_function *
2910 conflict_fn_not_known (void)
2911 {
2912   conflict_function *fn = XCNEW (conflict_function);
2913   fn->n = NOT_KNOWN;
2914
2915   return fn;
2916 }
2917
2918 /* Returns the conflict function for "independent".  */
2919
2920 static conflict_function *
2921 conflict_fn_no_dependence (void)
2922 {
2923   conflict_function *fn = XCNEW (conflict_function);
2924   fn->n = NO_DEPENDENCE;
2925
2926   return fn;
2927 }
2928
2929 /* Returns true if the address of OBJ is invariant in LOOP.  */
2930
2931 static bool
2932 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2933 {
2934   while (handled_component_p (obj))
2935     {
2936       if (TREE_CODE (obj) == ARRAY_REF)
2937         {
2938           for (int i = 1; i < 4; ++i)
2939             if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2940                                                         loop->num))
2941               return false;
2942         }
2943       else if (TREE_CODE (obj) == COMPONENT_REF)
2944         {
2945           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2946                                                       loop->num))
2947             return false;
2948         }
2949       obj = TREE_OPERAND (obj, 0);
2950     }
2951
2952   if (!INDIRECT_REF_P (obj)
2953       && TREE_CODE (obj) != MEM_REF)
2954     return true;
2955
2956   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2957                                                   loop->num);
2958 }
2959
2960 /* Returns false if we can prove that data references A and B do not alias,
2961    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2962    considered.  */
2963
2964 bool
2965 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2966                 class loop *loop_nest)
2967 {
2968   tree addr_a = DR_BASE_OBJECT (a);
2969   tree addr_b = DR_BASE_OBJECT (b);
2970
2971   /* If we are not processing a loop nest but scalar code we
2972      do not need to care about possible cross-iteration dependences
2973      and thus can process the full original reference.  Do so,
2974      similar to how loop invariant motion applies extra offset-based
2975      disambiguation.  */
2976   if (!loop_nest)
2977     {
2978       tree tree_size_a = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a)));
2979       tree tree_size_b = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b)));
2980
2981       if (DR_BASE_ADDRESS (a)
2982           && DR_BASE_ADDRESS (b)
2983           && operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b))
2984           && operand_equal_p (DR_OFFSET (a), DR_OFFSET (b))
2985           && poly_int_tree_p (tree_size_a)
2986           && poly_int_tree_p (tree_size_b)
2987           && !ranges_maybe_overlap_p (wi::to_poly_widest (DR_INIT (a)),
2988                                       wi::to_poly_widest (tree_size_a),
2989                                       wi::to_poly_widest (DR_INIT (b)),
2990                                       wi::to_poly_widest (tree_size_b)))
2991         {
2992           gcc_assert (integer_zerop (DR_STEP (a))
2993                       && integer_zerop (DR_STEP (b)));
2994           return false;
2995         }
2996
2997       aff_tree off1, off2;
2998       poly_widest_int size1, size2;
2999       get_inner_reference_aff (DR_REF (a), &off1, &size1);
3000       get_inner_reference_aff (DR_REF (b), &off2, &size2);
3001       aff_combination_scale (&off1, -1);
3002       aff_combination_add (&off2, &off1);
3003       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
3004         return false;
3005     }
3006
3007   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
3008       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
3009       /* For cross-iteration dependences the cliques must be valid for the
3010          whole loop, not just individual iterations.  */
3011       && (!loop_nest
3012           || MR_DEPENDENCE_CLIQUE (addr_a) == 1
3013           || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
3014       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
3015       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
3016     return false;
3017
3018   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
3019      do not know the size of the base-object.  So we cannot do any
3020      offset/overlap based analysis but have to rely on points-to
3021      information only.  */
3022   if (TREE_CODE (addr_a) == MEM_REF
3023       && (DR_UNCONSTRAINED_BASE (a)
3024           || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
3025     {
3026       /* For true dependences we can apply TBAA.  */
3027       if (flag_strict_aliasing
3028           && DR_IS_WRITE (a) && DR_IS_READ (b)
3029           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3030                                      get_alias_set (DR_REF (b))))
3031         return false;
3032       if (TREE_CODE (addr_b) == MEM_REF)
3033         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3034                                        TREE_OPERAND (addr_b, 0));
3035       else
3036         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3037                                        build_fold_addr_expr (addr_b));
3038     }
3039   else if (TREE_CODE (addr_b) == MEM_REF
3040            && (DR_UNCONSTRAINED_BASE (b)
3041                || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
3042     {
3043       /* For true dependences we can apply TBAA.  */
3044       if (flag_strict_aliasing
3045           && DR_IS_WRITE (a) && DR_IS_READ (b)
3046           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3047                                      get_alias_set (DR_REF (b))))
3048         return false;
3049       if (TREE_CODE (addr_a) == MEM_REF)
3050         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3051                                        TREE_OPERAND (addr_b, 0));
3052       else
3053         return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3054                                        TREE_OPERAND (addr_b, 0));
3055     }
3056
3057   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3058      that is being subsetted in the loop nest.  */
3059   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
3060     return refs_output_dependent_p (addr_a, addr_b);
3061   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
3062     return refs_anti_dependent_p (addr_a, addr_b);
3063   return refs_may_alias_p (addr_a, addr_b);
3064 }
3065
3066 /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
3067    if it is meaningful to compare their associated access functions
3068    when checking for dependencies.  */
3069
3070 static bool
3071 access_fn_components_comparable_p (tree ref_a, tree ref_b)
3072 {
3073   /* Allow pairs of component refs from the following sets:
3074
3075        { REALPART_EXPR, IMAGPART_EXPR }
3076        { COMPONENT_REF }
3077        { ARRAY_REF }.  */
3078   tree_code code_a = TREE_CODE (ref_a);
3079   tree_code code_b = TREE_CODE (ref_b);
3080   if (code_a == IMAGPART_EXPR)
3081     code_a = REALPART_EXPR;
3082   if (code_b == IMAGPART_EXPR)
3083     code_b = REALPART_EXPR;
3084   if (code_a != code_b)
3085     return false;
3086
3087   if (TREE_CODE (ref_a) == COMPONENT_REF)
3088     /* ??? We cannot simply use the type of operand #0 of the refs here as
3089        the Fortran compiler smuggles type punning into COMPONENT_REFs.
3090        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
3091     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
3092             == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
3093
3094   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
3095                              TREE_TYPE (TREE_OPERAND (ref_b, 0)));
3096 }
3097
3098 /* Initialize a data dependence relation RES in LOOP_NEST.  USE_ALT_INDICES
3099    is true when the main indices of A and B were not comparable so we try again
3100    with alternate indices computed on an indirect reference.  */
3101
3102 struct data_dependence_relation *
3103 initialize_data_dependence_relation (struct data_dependence_relation *res,
3104                                      vec<loop_p> loop_nest,
3105                                      bool use_alt_indices)
3106 {
3107   struct data_reference *a = DDR_A (res);
3108   struct data_reference *b = DDR_B (res);
3109   unsigned int i;
3110
3111   struct indices *indices_a = &a->indices;
3112   struct indices *indices_b = &b->indices;
3113   if (use_alt_indices)
3114     {
3115       if (TREE_CODE (DR_REF (a)) != MEM_REF)
3116         indices_a = &a->alt_indices;
3117       if (TREE_CODE (DR_REF (b)) != MEM_REF)
3118         indices_b = &b->alt_indices;
3119     }
3120   unsigned int num_dimensions_a = indices_a->access_fns.length ();
3121   unsigned int num_dimensions_b = indices_b->access_fns.length ();
3122   if (num_dimensions_a == 0 || num_dimensions_b == 0)
3123     {
3124       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3125       return res;
3126     }
3127
3128   /* For unconstrained bases, the root (highest-indexed) subscript
3129      describes a variation in the base of the original DR_REF rather
3130      than a component access.  We have no type that accurately describes
3131      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3132      applying this subscript) so limit the search to the last real
3133      component access.
3134
3135      E.g. for:
3136
3137         void
3138         f (int a[][8], int b[][8])
3139         {
3140           for (int i = 0; i < 8; ++i)
3141             a[i * 2][0] = b[i][0];
3142         }
3143
3144      the a and b accesses have a single ARRAY_REF component reference [0]
3145      but have two subscripts.  */
3146   if (indices_a->unconstrained_base)
3147     num_dimensions_a -= 1;
3148   if (indices_b->unconstrained_base)
3149     num_dimensions_b -= 1;
3150
3151   /* These structures describe sequences of component references in
3152      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
3153      specific access function.  */
3154   struct {
3155     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3156        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3157        indices.  In C notation, these are the indices of the rightmost
3158        component references; e.g. for a sequence .b.c.d, the start
3159        index is for .d.  */
3160     unsigned int start_a;
3161     unsigned int start_b;
3162
3163     /* The sequence contains LENGTH consecutive access functions from
3164        each DR.  */
3165     unsigned int length;
3166
3167     /* The enclosing objects for the A and B sequences respectively,
3168        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3169        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
3170     tree object_a;
3171     tree object_b;
3172   } full_seq = {}, struct_seq = {};
3173
3174   /* Before each iteration of the loop:
3175
3176      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3177      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
3178   unsigned int index_a = 0;
3179   unsigned int index_b = 0;
3180   tree ref_a = DR_REF (a);
3181   tree ref_b = DR_REF (b);
3182
3183   /* Now walk the component references from the final DR_REFs back up to
3184      the enclosing base objects.  Each component reference corresponds
3185      to one access function in the DR, with access function 0 being for
3186      the final DR_REF and the highest-indexed access function being the
3187      one that is applied to the base of the DR.
3188
3189      Look for a sequence of component references whose access functions
3190      are comparable (see access_fn_components_comparable_p).  If more
3191      than one such sequence exists, pick the one nearest the base
3192      (which is the leftmost sequence in C notation).  Store this sequence
3193      in FULL_SEQ.
3194
3195      For example, if we have:
3196
3197         struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3198
3199         A: a[0][i].s.c.d
3200         B: __real b[0][i].s.e[i].f
3201
3202      (where d is the same type as the real component of f) then the access
3203      functions would be:
3204
3205                          0   1   2   3
3206         A:              .d  .c  .s [i]
3207
3208                  0   1   2   3   4   5
3209         B:  __real  .f [i]  .e  .s [i]
3210
3211      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3212      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
3213      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
3214      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3215      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
3216      index foo[10] arrays, so is again comparable.  The sequence is
3217      therefore:
3218
3219         A: [1, 3]  (i.e. [i].s.c)
3220         B: [3, 5]  (i.e. [i].s.e)
3221
3222      Also look for sequences of component references whose access
3223      functions are comparable and whose enclosing objects have the same
3224      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
3225      example, STRUCT_SEQ would be:
3226
3227         A: [1, 2]  (i.e. s.c)
3228         B: [3, 4]  (i.e. s.e)  */
3229   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
3230     {
3231       /* The alternate indices form always has a single dimension
3232          with unconstrained base.  */
3233       gcc_assert (!use_alt_indices);
3234
3235       /* REF_A and REF_B must be one of the component access types
3236          allowed by dr_analyze_indices.  */
3237       gcc_checking_assert (access_fn_component_p (ref_a));
3238       gcc_checking_assert (access_fn_component_p (ref_b));
3239
3240       /* Get the immediately-enclosing objects for REF_A and REF_B,
3241          i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3242          and DR_ACCESS_FN (B, INDEX_B).  */
3243       tree object_a = TREE_OPERAND (ref_a, 0);
3244       tree object_b = TREE_OPERAND (ref_b, 0);
3245
3246       tree type_a = TREE_TYPE (object_a);
3247       tree type_b = TREE_TYPE (object_b);
3248       if (access_fn_components_comparable_p (ref_a, ref_b))
3249         {
3250           /* This pair of component accesses is comparable for dependence
3251              analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3252              DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
3253           if (full_seq.start_a + full_seq.length != index_a
3254               || full_seq.start_b + full_seq.length != index_b)
3255             {
3256               /* The accesses don't extend the current sequence,
3257                  so start a new one here.  */
3258               full_seq.start_a = index_a;
3259               full_seq.start_b = index_b;
3260               full_seq.length = 0;
3261             }
3262
3263           /* Add this pair of references to the sequence.  */
3264           full_seq.length += 1;
3265           full_seq.object_a = object_a;
3266           full_seq.object_b = object_b;
3267
3268           /* If the enclosing objects are structures (and thus have the
3269              same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
3270           if (TREE_CODE (type_a) == RECORD_TYPE)
3271             struct_seq = full_seq;
3272
3273           /* Move to the next containing reference for both A and B.  */
3274           ref_a = object_a;
3275           ref_b = object_b;
3276           index_a += 1;
3277           index_b += 1;
3278           continue;
3279         }
3280
3281       /* Try to approach equal type sizes.  */
3282       if (!COMPLETE_TYPE_P (type_a)
3283           || !COMPLETE_TYPE_P (type_b)
3284           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3285           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3286         break;
3287
3288       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3289       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3290       if (size_a <= size_b)
3291         {
3292           index_a += 1;
3293           ref_a = object_a;
3294         }
3295       if (size_b <= size_a)
3296         {
3297           index_b += 1;
3298           ref_b = object_b;
3299         }
3300     }
3301
3302   /* See whether FULL_SEQ ends at the base and whether the two bases
3303      are equal.  We do not care about TBAA or alignment info so we can
3304      use OEP_ADDRESS_OF to avoid false negatives.  */
3305   tree base_a = indices_a->base_object;
3306   tree base_b = indices_b->base_object;
3307   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3308                       && full_seq.start_b + full_seq.length == num_dimensions_b
3309                       && (indices_a->unconstrained_base
3310                           == indices_b->unconstrained_base)
3311                       && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3312                       && (types_compatible_p (TREE_TYPE (base_a),
3313                                               TREE_TYPE (base_b))
3314                           || (!base_supports_access_fn_components_p (base_a)
3315                               && !base_supports_access_fn_components_p (base_b)
3316                               && operand_equal_p
3317                                    (TYPE_SIZE (TREE_TYPE (base_a)),
3318                                     TYPE_SIZE (TREE_TYPE (base_b)), 0)))
3319                       && (!loop_nest.exists ()
3320                           || (object_address_invariant_in_loop_p
3321                               (loop_nest[0], base_a))));
3322
3323   /* If the bases are the same, we can include the base variation too.
3324      E.g. the b accesses in:
3325
3326        for (int i = 0; i < n; ++i)
3327          b[i + 4][0] = b[i][0];
3328
3329      have a definite dependence distance of 4, while for:
3330
3331        for (int i = 0; i < n; ++i)
3332          a[i + 4][0] = b[i][0];
3333
3334      the dependence distance depends on the gap between a and b.
3335
3336      If the bases are different then we can only rely on the sequence
3337      rooted at a structure access, since arrays are allowed to overlap
3338      arbitrarily and change shape arbitrarily.  E.g. we treat this as
3339      valid code:
3340
3341        int a[256];
3342        ...
3343        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3344
3345      where two lvalues with the same int[4][3] type overlap, and where
3346      both lvalues are distinct from the object's declared type.  */
3347   if (same_base_p)
3348     {
3349       if (indices_a->unconstrained_base)
3350         full_seq.length += 1;
3351     }
3352   else
3353     full_seq = struct_seq;
3354
3355   /* Punt if we didn't find a suitable sequence.  */
3356   if (full_seq.length == 0)
3357     {
3358       if (use_alt_indices
3359           || (TREE_CODE (DR_REF (a)) == MEM_REF
3360               && TREE_CODE (DR_REF (b)) == MEM_REF)
3361           || may_be_nonaddressable_p (DR_REF (a))
3362           || may_be_nonaddressable_p (DR_REF (b)))
3363         {
3364           /* Fully exhausted possibilities.  */
3365           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3366           return res;
3367         }
3368
3369       /* Try evaluating both DRs as dereferences of pointers.  */
3370       if (!a->alt_indices.base_object
3371           && TREE_CODE (DR_REF (a)) != MEM_REF)
3372         {
3373           tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
3374                                  build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
3375                                  build_int_cst
3376                                    (reference_alias_ptr_type (DR_REF (a)), 0));
3377           dr_analyze_indices (&a->alt_indices, alt_ref,
3378                               loop_preheader_edge (loop_nest[0]),
3379                               loop_containing_stmt (DR_STMT (a)));
3380         }
3381       if (!b->alt_indices.base_object
3382           && TREE_CODE (DR_REF (b)) != MEM_REF)
3383         {
3384           tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
3385                                  build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
3386                                  build_int_cst
3387                                    (reference_alias_ptr_type (DR_REF (b)), 0));
3388           dr_analyze_indices (&b->alt_indices, alt_ref,
3389                               loop_preheader_edge (loop_nest[0]),
3390                               loop_containing_stmt (DR_STMT (b)));
3391         }
3392       return initialize_data_dependence_relation (res, loop_nest, true);
3393     }
3394
3395   if (!same_base_p)
3396     {
3397       /* Partial overlap is possible for different bases when strict aliasing
3398          is not in effect.  It's also possible if either base involves a union
3399          access; e.g. for:
3400
3401            struct s1 { int a[2]; };
3402            struct s2 { struct s1 b; int c; };
3403            struct s3 { int d; struct s1 e; };
3404            union u { struct s2 f; struct s3 g; } *p, *q;
3405
3406          the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3407          "p->g.e" (base "p->g") and might partially overlap the s1 at
3408          "q->g.e" (base "q->g").  */
3409       if (!flag_strict_aliasing
3410           || ref_contains_union_access_p (full_seq.object_a)
3411           || ref_contains_union_access_p (full_seq.object_b))
3412         {
3413           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3414           return res;
3415         }
3416
3417       DDR_COULD_BE_INDEPENDENT_P (res) = true;
3418       if (!loop_nest.exists ()
3419           || (object_address_invariant_in_loop_p (loop_nest[0],
3420                                                   full_seq.object_a)
3421               && object_address_invariant_in_loop_p (loop_nest[0],
3422                                                      full_seq.object_b)))
3423         {
3424           DDR_OBJECT_A (res) = full_seq.object_a;
3425           DDR_OBJECT_B (res) = full_seq.object_b;
3426         }
3427     }
3428
3429   DDR_AFFINE_P (res) = true;
3430   DDR_ARE_DEPENDENT (res) = NULL_TREE;
3431   DDR_SUBSCRIPTS (res).create (full_seq.length);
3432   DDR_LOOP_NEST (res) = loop_nest;
3433   DDR_SELF_REFERENCE (res) = false;
3434
3435   for (i = 0; i < full_seq.length; ++i)
3436     {
3437       struct subscript *subscript;
3438
3439       subscript = XNEW (struct subscript);
3440       SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
3441       SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
3442       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3443       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3444       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3445       SUB_DISTANCE (subscript) = chrec_dont_know;
3446       DDR_SUBSCRIPTS (res).safe_push (subscript);
3447     }
3448
3449   return res;
3450 }
3451
3452 /* Initialize a data dependence relation between data accesses A and
3453    B.  NB_LOOPS is the number of loops surrounding the references: the
3454    size of the classic distance/direction vectors.  */
3455
3456 struct data_dependence_relation *
3457 initialize_data_dependence_relation (struct data_reference *a,
3458                                      struct data_reference *b,
3459                                      vec<loop_p> loop_nest)
3460 {
3461   data_dependence_relation *res = XCNEW (struct data_dependence_relation);
3462   DDR_A (res) = a;
3463   DDR_B (res) = b;
3464   DDR_LOOP_NEST (res).create (0);
3465   DDR_SUBSCRIPTS (res).create (0);
3466   DDR_DIR_VECTS (res).create (0);
3467   DDR_DIST_VECTS (res).create (0);
3468
3469   if (a == NULL || b == NULL)
3470     {
3471       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3472       return res;
3473     }
3474
3475   /* If the data references do not alias, then they are independent.  */
3476   if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
3477     {
3478       DDR_ARE_DEPENDENT (res) = chrec_known;
3479       return res;
3480     }
3481
3482   return initialize_data_dependence_relation (res, loop_nest, false);
3483 }
3484
3485
3486 /* Frees memory used by the conflict function F.  */
3487
3488 static void
3489 free_conflict_function (conflict_function *f)
3490 {
3491   unsigned i;
3492
3493   if (CF_NONTRIVIAL_P (f))
3494     {
3495       for (i = 0; i < f->n; i++)
3496         affine_fn_free (f->fns[i]);
3497     }
3498   free (f);
3499 }
3500
3501 /* Frees memory used by SUBSCRIPTS.  */
3502
3503 static void
3504 free_subscripts (vec<subscript_p> subscripts)
3505 {
3506   for (subscript_p s : subscripts)
3507     {
3508       free_conflict_function (s->conflicting_iterations_in_a);
3509       free_conflict_function (s->conflicting_iterations_in_b);
3510       free (s);
3511     }
3512   subscripts.release ();
3513 }
3514
3515 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3516    description.  */
3517
3518 static inline void
3519 finalize_ddr_dependent (struct data_dependence_relation *ddr,
3520                         tree chrec)
3521 {
3522   DDR_ARE_DEPENDENT (ddr) = chrec;
3523   free_subscripts (DDR_SUBSCRIPTS (ddr));
3524   DDR_SUBSCRIPTS (ddr).create (0);
3525 }
3526
3527 /* The dependence relation DDR cannot be represented by a distance
3528    vector.  */
3529
3530 static inline void
3531 non_affine_dependence_relation (struct data_dependence_relation *ddr)
3532 {
3533   if (dump_file && (dump_flags & TDF_DETAILS))
3534     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3535
3536   DDR_AFFINE_P (ddr) = false;
3537 }
3538
3539 \f
3540
3541 /* This section contains the classic Banerjee tests.  */
3542
3543 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3544    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
3545
3546 static inline bool
3547 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3548 {
3549   return (evolution_function_is_constant_p (chrec_a)
3550           && evolution_function_is_constant_p (chrec_b));
3551 }
3552
3553 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3554    variable, i.e., if the SIV (Single Index Variable) test is true.  */
3555
3556 static bool
3557 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3558 {
3559   if ((evolution_function_is_constant_p (chrec_a)
3560        && evolution_function_is_univariate_p (chrec_b))
3561       || (evolution_function_is_constant_p (chrec_b)
3562           && evolution_function_is_univariate_p (chrec_a)))
3563     return true;
3564
3565   if (evolution_function_is_univariate_p (chrec_a)
3566       && evolution_function_is_univariate_p (chrec_b))
3567     {
3568       switch (TREE_CODE (chrec_a))
3569         {
3570         case POLYNOMIAL_CHREC:
3571           switch (TREE_CODE (chrec_b))
3572             {
3573             case POLYNOMIAL_CHREC:
3574               if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3575                 return false;
3576               /* FALLTHRU */
3577
3578             default:
3579               return true;
3580             }
3581
3582         default:
3583           return true;
3584         }
3585     }
3586
3587   return false;
3588 }
3589
3590 /* Creates a conflict function with N dimensions.  The affine functions
3591    in each dimension follow.  */
3592
3593 static conflict_function *
3594 conflict_fn (unsigned n, ...)
3595 {
3596   unsigned i;
3597   conflict_function *ret = XCNEW (conflict_function);
3598   va_list ap;
3599
3600   gcc_assert (n > 0 && n <= MAX_DIM);
3601   va_start (ap, n);
3602
3603   ret->n = n;
3604   for (i = 0; i < n; i++)
3605     ret->fns[i] = va_arg (ap, affine_fn);
3606   va_end (ap);
3607
3608   return ret;
3609 }
3610
3611 /* Returns constant affine function with value CST.  */
3612
3613 static affine_fn
3614 affine_fn_cst (tree cst)
3615 {
3616   affine_fn fn;
3617   fn.create (1);
3618   fn.quick_push (cst);
3619   return fn;
3620 }
3621
3622 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
3623
3624 static affine_fn
3625 affine_fn_univar (tree cst, unsigned dim, tree coef)
3626 {
3627   affine_fn fn;
3628   fn.create (dim + 1);
3629   unsigned i;
3630
3631   gcc_assert (dim > 0);
3632   fn.quick_push (cst);
3633   for (i = 1; i < dim; i++)
3634     fn.quick_push (integer_zero_node);
3635   fn.quick_push (coef);
3636   return fn;
3637 }
3638
3639 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
3640    *OVERLAPS_B are initialized to the functions that describe the
3641    relation between the elements accessed twice by CHREC_A and
3642    CHREC_B.  For k >= 0, the following property is verified:
3643
3644    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3645
3646 static void
3647 analyze_ziv_subscript (tree chrec_a,
3648                        tree chrec_b,
3649                        conflict_function **overlaps_a,
3650                        conflict_function **overlaps_b,
3651                        tree *last_conflicts)
3652 {
3653   tree type, difference;
3654   dependence_stats.num_ziv++;
3655
3656   if (dump_file && (dump_flags & TDF_DETAILS))
3657     fprintf (dump_file, "(analyze_ziv_subscript \n");
3658
3659   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3660   chrec_a = chrec_convert (type, chrec_a, NULL);
3661   chrec_b = chrec_convert (type, chrec_b, NULL);
3662   difference = chrec_fold_minus (type, chrec_a, chrec_b);
3663
3664   switch (TREE_CODE (difference))
3665     {
3666     case INTEGER_CST:
3667       if (integer_zerop (difference))
3668         {
3669           /* The difference is equal to zero: the accessed index
3670              overlaps for each iteration in the loop.  */
3671           *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3672           *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3673           *last_conflicts = chrec_dont_know;
3674           dependence_stats.num_ziv_dependent++;
3675         }
3676       else
3677         {
3678           /* The accesses do not overlap.  */
3679           *overlaps_a = conflict_fn_no_dependence ();
3680           *overlaps_b = conflict_fn_no_dependence ();
3681           *last_conflicts = integer_zero_node;
3682           dependence_stats.num_ziv_independent++;
3683         }
3684       break;
3685
3686     default:
3687       /* We're not sure whether the indexes overlap.  For the moment,
3688          conservatively answer "don't know".  */
3689       if (dump_file && (dump_flags & TDF_DETAILS))
3690         fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3691
3692       *overlaps_a = conflict_fn_not_known ();
3693       *overlaps_b = conflict_fn_not_known ();
3694       *last_conflicts = chrec_dont_know;
3695       dependence_stats.num_ziv_unimplemented++;
3696       break;
3697     }
3698
3699   if (dump_file && (dump_flags & TDF_DETAILS))
3700     fprintf (dump_file, ")\n");
3701 }
3702
3703 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3704    and only if it fits to the int type.  If this is not the case, or the
3705    bound  on the number of iterations of LOOP could not be derived, returns
3706    chrec_dont_know.  */
3707
3708 static tree
3709 max_stmt_executions_tree (class loop *loop)
3710 {
3711   widest_int nit;
3712
3713   if (!max_stmt_executions (loop, &nit))
3714     return chrec_dont_know;
3715
3716   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3717     return chrec_dont_know;
3718
3719   return wide_int_to_tree (unsigned_type_node, nit);
3720 }
3721
3722 /* Determine whether the CHREC is always positive/negative.  If the expression
3723    cannot be statically analyzed, return false, otherwise set the answer into
3724    VALUE.  */
3725
3726 static bool
3727 chrec_is_positive (tree chrec, bool *value)
3728 {
3729   bool value0, value1, value2;
3730   tree end_value, nb_iter;
3731
3732   switch (TREE_CODE (chrec))
3733     {
3734     case POLYNOMIAL_CHREC:
3735       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3736           || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3737         return false;
3738
3739       /* FIXME -- overflows.  */
3740       if (value0 == value1)
3741         {
3742           *value = value0;
3743           return true;
3744         }
3745
3746       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3747          and the proof consists in showing that the sign never
3748          changes during the execution of the loop, from 0 to
3749          loop->nb_iterations.  */
3750       if (!evolution_function_is_affine_p (chrec))
3751         return false;
3752
3753       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3754       if (chrec_contains_undetermined (nb_iter))
3755         return false;
3756
3757 #if 0
3758       /* TODO -- If the test is after the exit, we may decrease the number of
3759          iterations by one.  */
3760       if (after_exit)
3761         nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3762 #endif
3763
3764       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3765
3766       if (!chrec_is_positive (end_value, &value2))
3767         return false;
3768
3769       *value = value0;
3770       return value0 == value1;
3771
3772     case INTEGER_CST:
3773       switch (tree_int_cst_sgn (chrec))
3774         {
3775         case -1:
3776           *value = false;
3777           break;
3778         case 1:
3779           *value = true;
3780           break;
3781         default:
3782           return false;
3783         }
3784       return true;
3785
3786     default:
3787       return false;
3788     }
3789 }
3790
3791
3792 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3793    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
3794    *OVERLAPS_B are initialized to the functions that describe the
3795    relation between the elements accessed twice by CHREC_A and
3796    CHREC_B.  For k >= 0, the following property is verified:
3797
3798    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3799
3800 static void
3801 analyze_siv_subscript_cst_affine (tree chrec_a,
3802                                   tree chrec_b,
3803                                   conflict_function **overlaps_a,
3804                                   conflict_function **overlaps_b,
3805                                   tree *last_conflicts)
3806 {
3807   bool value0, value1, value2;
3808   tree type, difference, tmp;
3809
3810   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3811   chrec_a = chrec_convert (type, chrec_a, NULL);
3812   chrec_b = chrec_convert (type, chrec_b, NULL);
3813   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3814
3815   /* Special case overlap in the first iteration.  */
3816   if (integer_zerop (difference))
3817     {
3818       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3819       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3820       *last_conflicts = integer_one_node;
3821       return;
3822     }
3823
3824   if (!chrec_is_positive (initial_condition (difference), &value0))
3825     {
3826       if (dump_file && (dump_flags & TDF_DETAILS))
3827         fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3828
3829       dependence_stats.num_siv_unimplemented++;
3830       *overlaps_a = conflict_fn_not_known ();
3831       *overlaps_b = conflict_fn_not_known ();
3832       *last_conflicts = chrec_dont_know;
3833       return;
3834     }
3835   else
3836     {
3837       if (value0 == false)
3838         {
3839           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3840               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3841             {
3842               if (dump_file && (dump_flags & TDF_DETAILS))
3843                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3844
3845               *overlaps_a = conflict_fn_not_known ();
3846               *overlaps_b = conflict_fn_not_known ();
3847               *last_conflicts = chrec_dont_know;
3848               dependence_stats.num_siv_unimplemented++;
3849               return;
3850             }
3851           else
3852             {
3853               if (value1 == true)
3854                 {
3855                   /* Example:
3856                      chrec_a = 12
3857                      chrec_b = {10, +, 1}
3858                   */
3859
3860                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3861                     {
3862                       HOST_WIDE_INT numiter;
3863                       class loop *loop = get_chrec_loop (chrec_b);
3864
3865                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3866                       tmp = fold_build2 (EXACT_DIV_EXPR, type,
3867                                          fold_build1 (ABS_EXPR, type, difference),
3868                                          CHREC_RIGHT (chrec_b));
3869                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3870                       *last_conflicts = integer_one_node;
3871
3872
3873                       /* Perform weak-zero siv test to see if overlap is
3874                          outside the loop bounds.  */
3875                       numiter = max_stmt_executions_int (loop);
3876
3877                       if (numiter >= 0
3878                           && compare_tree_int (tmp, numiter) > 0)
3879                         {
3880                           free_conflict_function (*overlaps_a);
3881                           free_conflict_function (*overlaps_b);
3882                           *overlaps_a = conflict_fn_no_dependence ();
3883                           *overlaps_b = conflict_fn_no_dependence ();
3884                           *last_conflicts = integer_zero_node;
3885                           dependence_stats.num_siv_independent++;
3886                           return;
3887                         }
3888                       dependence_stats.num_siv_dependent++;
3889                       return;
3890                     }
3891
3892                   /* When the step does not divide the difference, there are
3893                      no overlaps.  */
3894                   else
3895                     {
3896                       *overlaps_a = conflict_fn_no_dependence ();
3897                       *overlaps_b = conflict_fn_no_dependence ();
3898                       *last_conflicts = integer_zero_node;
3899                       dependence_stats.num_siv_independent++;
3900                       return;
3901                     }
3902                 }
3903
3904               else
3905                 {
3906                   /* Example:
3907                      chrec_a = 12
3908                      chrec_b = {10, +, -1}
3909
3910                      In this case, chrec_a will not overlap with chrec_b.  */
3911                   *overlaps_a = conflict_fn_no_dependence ();
3912                   *overlaps_b = conflict_fn_no_dependence ();
3913                   *last_conflicts = integer_zero_node;
3914                   dependence_stats.num_siv_independent++;
3915                   return;
3916                 }
3917             }
3918         }
3919       else
3920         {
3921           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3922               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3923             {
3924               if (dump_file && (dump_flags & TDF_DETAILS))
3925                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3926
3927               *overlaps_a = conflict_fn_not_known ();
3928               *overlaps_b = conflict_fn_not_known ();
3929               *last_conflicts = chrec_dont_know;
3930               dependence_stats.num_siv_unimplemented++;
3931               return;
3932             }
3933           else
3934             {
3935               if (value2 == false)
3936                 {
3937                   /* Example:
3938                      chrec_a = 3
3939                      chrec_b = {10, +, -1}
3940                   */
3941                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3942                     {
3943                       HOST_WIDE_INT numiter;
3944                       class loop *loop = get_chrec_loop (chrec_b);
3945
3946                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3947                       tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3948                                          CHREC_RIGHT (chrec_b));
3949                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3950                       *last_conflicts = integer_one_node;
3951
3952                       /* Perform weak-zero siv test to see if overlap is
3953                          outside the loop bounds.  */
3954                       numiter = max_stmt_executions_int (loop);
3955
3956                       if (numiter >= 0
3957                           && compare_tree_int (tmp, numiter) > 0)
3958                         {
3959                           free_conflict_function (*overlaps_a);
3960                           free_conflict_function (*overlaps_b);
3961                           *overlaps_a = conflict_fn_no_dependence ();
3962                           *overlaps_b = conflict_fn_no_dependence ();
3963                           *last_conflicts = integer_zero_node;
3964                           dependence_stats.num_siv_independent++;
3965                           return;
3966                         }
3967                       dependence_stats.num_siv_dependent++;
3968                       return;
3969                     }
3970
3971                   /* When the step does not divide the difference, there
3972                      are no overlaps.  */
3973                   else
3974                     {
3975                       *overlaps_a = conflict_fn_no_dependence ();
3976                       *overlaps_b = conflict_fn_no_dependence ();
3977                       *last_conflicts = integer_zero_node;
3978                       dependence_stats.num_siv_independent++;
3979                       return;
3980                     }
3981                 }
3982               else
3983                 {
3984                   /* Example:
3985                      chrec_a = 3
3986                      chrec_b = {4, +, 1}
3987
3988                      In this case, chrec_a will not overlap with chrec_b.  */
3989                   *overlaps_a = conflict_fn_no_dependence ();
3990                   *overlaps_b = conflict_fn_no_dependence ();
3991                   *last_conflicts = integer_zero_node;
3992                   dependence_stats.num_siv_independent++;
3993                   return;
3994                 }
3995             }
3996         }
3997     }
3998 }
3999
4000 /* Helper recursive function for initializing the matrix A.  Returns
4001    the initial value of CHREC.  */
4002
4003 static tree
4004 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
4005 {
4006   gcc_assert (chrec);
4007
4008   switch (TREE_CODE (chrec))
4009     {
4010     case POLYNOMIAL_CHREC:
4011       HOST_WIDE_INT chrec_right;
4012       if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
4013         return chrec_dont_know;
4014       chrec_right = int_cst_value (CHREC_RIGHT (chrec));
4015       /* We want to be able to negate without overflow.  */
4016       if (chrec_right == HOST_WIDE_INT_MIN)
4017         return chrec_dont_know;
4018       A[index][0] = mult * chrec_right;
4019       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
4020
4021     case PLUS_EXPR:
4022     case MULT_EXPR:
4023     case MINUS_EXPR:
4024       {
4025         tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4026         tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
4027
4028         return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
4029       }
4030
4031     CASE_CONVERT:
4032       {
4033         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4034         return chrec_convert (chrec_type (chrec), op, NULL);
4035       }
4036
4037     case BIT_NOT_EXPR:
4038       {
4039         /* Handle ~X as -1 - X.  */
4040         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4041         return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
4042                               build_int_cst (TREE_TYPE (chrec), -1), op);
4043       }
4044
4045     case INTEGER_CST:
4046       return chrec;
4047
4048     default:
4049       gcc_unreachable ();
4050       return NULL_TREE;
4051     }
4052 }
4053
4054 #define FLOOR_DIV(x,y) ((x) / (y))
4055
4056 /* Solves the special case of the Diophantine equation:
4057    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4058
4059    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
4060    number of iterations that loops X and Y run.  The overlaps will be
4061    constructed as evolutions in dimension DIM.  */
4062
4063 static void
4064 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
4065                                          HOST_WIDE_INT step_a,
4066                                          HOST_WIDE_INT step_b,
4067                                          affine_fn *overlaps_a,
4068                                          affine_fn *overlaps_b,
4069                                          tree *last_conflicts, int dim)
4070 {
4071   if (((step_a > 0 && step_b > 0)
4072        || (step_a < 0 && step_b < 0)))
4073     {
4074       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
4075       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
4076
4077       gcd_steps_a_b = gcd (step_a, step_b);
4078       step_overlaps_a = step_b / gcd_steps_a_b;
4079       step_overlaps_b = step_a / gcd_steps_a_b;
4080
4081       if (niter > 0)
4082         {
4083           tau2 = FLOOR_DIV (niter, step_overlaps_a);
4084           tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
4085           last_conflict = tau2;
4086           *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
4087         }
4088       else
4089         *last_conflicts = chrec_dont_know;
4090
4091       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
4092                                       build_int_cst (NULL_TREE,
4093                                                      step_overlaps_a));
4094       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
4095                                       build_int_cst (NULL_TREE,
4096                                                      step_overlaps_b));
4097     }
4098
4099   else
4100     {
4101       *overlaps_a = affine_fn_cst (integer_zero_node);
4102       *overlaps_b = affine_fn_cst (integer_zero_node);
4103       *last_conflicts = integer_zero_node;
4104     }
4105 }
4106
4107 /* Solves the special case of a Diophantine equation where CHREC_A is
4108    an affine bivariate function, and CHREC_B is an affine univariate
4109    function.  For example,
4110
4111    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4112
4113    has the following overlapping functions:
4114
4115    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4116    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4117    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4118
4119    FORNOW: This is a specialized implementation for a case occurring in
4120    a common benchmark.  Implement the general algorithm.  */
4121
4122 static void
4123 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
4124                                       conflict_function **overlaps_a,
4125                                       conflict_function **overlaps_b,
4126                                       tree *last_conflicts)
4127 {
4128   bool xz_p, yz_p, xyz_p;
4129   HOST_WIDE_INT step_x, step_y, step_z;
4130   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
4131   affine_fn overlaps_a_xz, overlaps_b_xz;
4132   affine_fn overlaps_a_yz, overlaps_b_yz;
4133   affine_fn overlaps_a_xyz, overlaps_b_xyz;
4134   affine_fn ova1, ova2, ovb;
4135   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
4136
4137   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
4138   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
4139   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
4140
4141   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
4142   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
4143   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
4144
4145   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
4146     {
4147       if (dump_file && (dump_flags & TDF_DETAILS))
4148         fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
4149
4150       *overlaps_a = conflict_fn_not_known ();
4151       *overlaps_b = conflict_fn_not_known ();
4152       *last_conflicts = chrec_dont_know;
4153       return;
4154     }
4155
4156   niter = MIN (niter_x, niter_z);
4157   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
4158                                            &overlaps_a_xz,
4159                                            &overlaps_b_xz,
4160                                            &last_conflicts_xz, 1);
4161   niter = MIN (niter_y, niter_z);
4162   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
4163                                            &overlaps_a_yz,
4164                                            &overlaps_b_yz,
4165                                            &last_conflicts_yz, 2);
4166   niter = MIN (niter_x, niter_z);
4167   niter = MIN (niter_y, niter);
4168   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
4169                                            &overlaps_a_xyz,
4170                                            &overlaps_b_xyz,
4171                                            &last_conflicts_xyz, 3);
4172
4173   xz_p = !integer_zerop (last_conflicts_xz);
4174   yz_p = !integer_zerop (last_conflicts_yz);
4175   xyz_p = !integer_zerop (last_conflicts_xyz);
4176
4177   if (xz_p || yz_p || xyz_p)
4178     {
4179       ova1 = affine_fn_cst (integer_zero_node);
4180       ova2 = affine_fn_cst (integer_zero_node);
4181       ovb = affine_fn_cst (integer_zero_node);
4182       if (xz_p)
4183         {
4184           affine_fn t0 = ova1;
4185           affine_fn t2 = ovb;
4186
4187           ova1 = affine_fn_plus (ova1, overlaps_a_xz);
4188           ovb = affine_fn_plus (ovb, overlaps_b_xz);
4189           affine_fn_free (t0);
4190           affine_fn_free (t2);
4191           *last_conflicts = last_conflicts_xz;
4192         }
4193       if (yz_p)
4194         {
4195           affine_fn t0 = ova2;
4196           affine_fn t2 = ovb;
4197
4198           ova2 = affine_fn_plus (ova2, overlaps_a_yz);
4199           ovb = affine_fn_plus (ovb, overlaps_b_yz);
4200           affine_fn_free (t0);
4201           affine_fn_free (t2);
4202           *last_conflicts = last_conflicts_yz;
4203         }
4204       if (xyz_p)
4205         {
4206           affine_fn t0 = ova1;
4207           affine_fn t2 = ova2;
4208           affine_fn t4 = ovb;
4209
4210           ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
4211           ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
4212           ovb = affine_fn_plus (ovb, overlaps_b_xyz);
4213           affine_fn_free (t0);
4214           affine_fn_free (t2);
4215           affine_fn_free (t4);
4216           *last_conflicts = last_conflicts_xyz;
4217         }
4218       *overlaps_a = conflict_fn (2, ova1, ova2);
4219       *overlaps_b = conflict_fn (1, ovb);
4220     }
4221   else
4222     {
4223       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4224       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4225       *last_conflicts = integer_zero_node;
4226     }
4227
4228   affine_fn_free (overlaps_a_xz);
4229   affine_fn_free (overlaps_b_xz);
4230   affine_fn_free (overlaps_a_yz);
4231   affine_fn_free (overlaps_b_yz);
4232   affine_fn_free (overlaps_a_xyz);
4233   affine_fn_free (overlaps_b_xyz);
4234 }
4235
4236 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
4237
4238 static void
4239 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
4240                     int size)
4241 {
4242   memcpy (vec2, vec1, size * sizeof (*vec1));
4243 }
4244
4245 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
4246
4247 static void
4248 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
4249                     int m, int n)
4250 {
4251   int i;
4252
4253   for (i = 0; i < m; i++)
4254     lambda_vector_copy (mat1[i], mat2[i], n);
4255 }
4256
4257 /* Store the N x N identity matrix in MAT.  */
4258
4259 static void
4260 lambda_matrix_id (lambda_matrix mat, int size)
4261 {
4262   int i, j;
4263
4264   for (i = 0; i < size; i++)
4265     for (j = 0; j < size; j++)
4266       mat[i][j] = (i == j) ? 1 : 0;
4267 }
4268
4269 /* Return the index of the first nonzero element of vector VEC1 between
4270    START and N.  We must have START <= N.
4271    Returns N if VEC1 is the zero vector.  */
4272
4273 static int
4274 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
4275 {
4276   int j = start;
4277   while (j < n && vec1[j] == 0)
4278     j++;
4279   return j;
4280 }
4281
4282 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4283    R2 = R2 + CONST1 * R1.  */
4284
4285 static bool
4286 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
4287                        lambda_int const1)
4288 {
4289   int i;
4290
4291   if (const1 == 0)
4292     return true;
4293
4294   for (i = 0; i < n; i++)
4295     {
4296       bool ovf;
4297       lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
4298       if (ovf)
4299         return false;
4300       lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
4301       if (ovf || tem2 == HOST_WIDE_INT_MIN)
4302         return false;
4303       mat[r2][i] = tem2;
4304     }
4305
4306   return true;
4307 }
4308
4309 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
4310    and store the result in VEC2.  */
4311
4312 static void
4313 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
4314                           int size, lambda_int const1)
4315 {
4316   int i;
4317
4318   if (const1 == 0)
4319     lambda_vector_clear (vec2, size);
4320   else
4321     for (i = 0; i < size; i++)
4322       vec2[i] = const1 * vec1[i];
4323 }
4324
4325 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
4326
4327 static void
4328 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
4329                       int size)
4330 {
4331   lambda_vector_mult_const (vec1, vec2, size, -1);
4332 }
4333
4334 /* Negate row R1 of matrix MAT which has N columns.  */
4335
4336 static void
4337 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
4338 {
4339   lambda_vector_negate (mat[r1], mat[r1], n);
4340 }
4341
4342 /* Return true if two vectors are equal.  */
4343
4344 static bool
4345 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
4346 {
4347   int i;
4348   for (i = 0; i < size; i++)
4349     if (vec1[i] != vec2[i])
4350       return false;
4351   return true;
4352 }
4353
4354 /* Given an M x N integer matrix A, this function determines an M x
4355    M unimodular matrix U, and an M x N echelon matrix S such that
4356    "U.A = S".  This decomposition is also known as "right Hermite".
4357
4358    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4359    Restructuring Compilers" Utpal Banerjee.  */
4360
4361 static bool
4362 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4363                              lambda_matrix S, lambda_matrix U)
4364 {
4365   int i, j, i0 = 0;
4366
4367   lambda_matrix_copy (A, S, m, n);
4368   lambda_matrix_id (U, m);
4369
4370   for (j = 0; j < n; j++)
4371     {
4372       if (lambda_vector_first_nz (S[j], m, i0) < m)
4373         {
4374           ++i0;
4375           for (i = m - 1; i >= i0; i--)
4376             {
4377               while (S[i][j] != 0)
4378                 {
4379                   lambda_int factor, a, b;
4380
4381                   a = S[i-1][j];
4382                   b = S[i][j];
4383                   gcc_assert (a != HOST_WIDE_INT_MIN);
4384                   factor = a / b;
4385
4386                   if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
4387                     return false;
4388                   std::swap (S[i], S[i-1]);
4389
4390                   if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
4391                     return false;
4392                   std::swap (U[i], U[i-1]);
4393                 }
4394             }
4395         }
4396     }
4397
4398   return true;
4399 }
4400
4401 /* Determines the overlapping elements due to accesses CHREC_A and
4402    CHREC_B, that are affine functions.  This function cannot handle
4403    symbolic evolution functions, ie. when initial conditions are
4404    parameters, because it uses lambda matrices of integers.  */
4405
4406 static void
4407 analyze_subscript_affine_affine (tree chrec_a,
4408                                  tree chrec_b,
4409                                  conflict_function **overlaps_a,
4410                                  conflict_function **overlaps_b,
4411                                  tree *last_conflicts)
4412 {
4413   unsigned nb_vars_a, nb_vars_b, dim;
4414   lambda_int gamma, gcd_alpha_beta;
4415   lambda_matrix A, U, S;
4416   struct obstack scratch_obstack;
4417
4418   if (eq_evolutions_p (chrec_a, chrec_b))
4419     {
4420       /* The accessed index overlaps for each iteration in the
4421          loop.  */
4422       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4423       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4424       *last_conflicts = chrec_dont_know;
4425       return;
4426     }
4427   if (dump_file && (dump_flags & TDF_DETAILS))
4428     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4429
4430   /* For determining the initial intersection, we have to solve a
4431      Diophantine equation.  This is the most time consuming part.
4432
4433      For answering to the question: "Is there a dependence?" we have
4434      to prove that there exists a solution to the Diophantine
4435      equation, and that the solution is in the iteration domain,
4436      i.e. the solution is positive or zero, and that the solution
4437      happens before the upper bound loop.nb_iterations.  Otherwise
4438      there is no dependence.  This function outputs a description of
4439      the iterations that hold the intersections.  */
4440
4441   nb_vars_a = nb_vars_in_chrec (chrec_a);
4442   nb_vars_b = nb_vars_in_chrec (chrec_b);
4443
4444   gcc_obstack_init (&scratch_obstack);
4445
4446   dim = nb_vars_a + nb_vars_b;
4447   U = lambda_matrix_new (dim, dim, &scratch_obstack);
4448   A = lambda_matrix_new (dim, 1, &scratch_obstack);
4449   S = lambda_matrix_new (dim, 1, &scratch_obstack);
4450
4451   tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4452   tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4453   if (init_a == chrec_dont_know
4454       || init_b == chrec_dont_know)
4455     {
4456       if (dump_file && (dump_flags & TDF_DETAILS))
4457         fprintf (dump_file, "affine-affine test failed: "
4458                  "representation issue.\n");
4459       *overlaps_a = conflict_fn_not_known ();
4460       *overlaps_b = conflict_fn_not_known ();
4461       *last_conflicts = chrec_dont_know;
4462       goto end_analyze_subs_aa;
4463     }
4464   gamma = int_cst_value (init_b) - int_cst_value (init_a);
4465
4466   /* Don't do all the hard work of solving the Diophantine equation
4467      when we already know the solution: for example,
4468      | {3, +, 1}_1
4469      | {3, +, 4}_2
4470      | gamma = 3 - 3 = 0.
4471      Then the first overlap occurs during the first iterations:
4472      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4473   */
4474   if (gamma == 0)
4475     {
4476       if (nb_vars_a == 1 && nb_vars_b == 1)
4477         {
4478           HOST_WIDE_INT step_a, step_b;
4479           HOST_WIDE_INT niter, niter_a, niter_b;
4480           affine_fn ova, ovb;
4481
4482           niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4483           niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4484           niter = MIN (niter_a, niter_b);
4485           step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4486           step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4487
4488           compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4489                                                    &ova, &ovb,
4490                                                    last_conflicts, 1);
4491           *overlaps_a = conflict_fn (1, ova);
4492           *overlaps_b = conflict_fn (1, ovb);
4493         }
4494
4495       else if (nb_vars_a == 2 && nb_vars_b == 1)
4496         compute_overlap_steps_for_affine_1_2
4497           (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4498
4499       else if (nb_vars_a == 1 && nb_vars_b == 2)
4500         compute_overlap_steps_for_affine_1_2
4501           (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4502
4503       else
4504         {
4505           if (dump_file && (dump_flags & TDF_DETAILS))
4506             fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4507           *overlaps_a = conflict_fn_not_known ();
4508           *overlaps_b = conflict_fn_not_known ();
4509           *last_conflicts = chrec_dont_know;
4510         }
4511       goto end_analyze_subs_aa;
4512     }
4513
4514   /* U.A = S */
4515   if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
4516     {
4517       *overlaps_a = conflict_fn_not_known ();
4518       *overlaps_b = conflict_fn_not_known ();
4519       *last_conflicts = chrec_dont_know;
4520       goto end_analyze_subs_aa;
4521     }
4522
4523   if (S[0][0] < 0)
4524     {
4525       S[0][0] *= -1;
4526       lambda_matrix_row_negate (U, dim, 0);
4527     }
4528   gcd_alpha_beta = S[0][0];
4529
4530   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4531      but that is a quite strange case.  Instead of ICEing, answer
4532      don't know.  */
4533   if (gcd_alpha_beta == 0)
4534     {
4535       *overlaps_a = conflict_fn_not_known ();
4536       *overlaps_b = conflict_fn_not_known ();
4537       *last_conflicts = chrec_dont_know;
4538       goto end_analyze_subs_aa;
4539     }
4540
4541   /* The classic "gcd-test".  */
4542   if (!int_divides_p (gcd_alpha_beta, gamma))
4543     {
4544       /* The "gcd-test" has determined that there is no integer
4545          solution, i.e. there is no dependence.  */
4546       *overlaps_a = conflict_fn_no_dependence ();
4547       *overlaps_b = conflict_fn_no_dependence ();
4548       *last_conflicts = integer_zero_node;
4549     }
4550
4551   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
4552   else if (nb_vars_a == 1 && nb_vars_b == 1)
4553     {
4554       /* Both functions should have the same evolution sign.  */
4555       if (((A[0][0] > 0 && -A[1][0] > 0)
4556            || (A[0][0] < 0 && -A[1][0] < 0)))
4557         {
4558           /* The solutions are given by:
4559              |
4560              | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
4561              |                           [u21 u22]    [y0]
4562
4563              For a given integer t.  Using the following variables,
4564
4565              | i0 = u11 * gamma / gcd_alpha_beta
4566              | j0 = u12 * gamma / gcd_alpha_beta
4567              | i1 = u21
4568              | j1 = u22
4569
4570              the solutions are:
4571
4572              | x0 = i0 + i1 * t,
4573              | y0 = j0 + j1 * t.  */
4574           HOST_WIDE_INT i0, j0, i1, j1;
4575
4576           i0 = U[0][0] * gamma / gcd_alpha_beta;
4577           j0 = U[0][1] * gamma / gcd_alpha_beta;
4578           i1 = U[1][0];
4579           j1 = U[1][1];
4580
4581           if ((i1 == 0 && i0 < 0)
4582               || (j1 == 0 && j0 < 0))
4583             {
4584               /* There is no solution.
4585                  FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4586                  falls in here, but for the moment we don't look at the
4587                  upper bound of the iteration domain.  */
4588               *overlaps_a = conflict_fn_no_dependence ();
4589               *overlaps_b = conflict_fn_no_dependence ();
4590               *last_conflicts = integer_zero_node;
4591               goto end_analyze_subs_aa;
4592             }
4593
4594           if (i1 > 0 && j1 > 0)
4595             {
4596               HOST_WIDE_INT niter_a
4597                 = max_stmt_executions_int (get_chrec_loop (chrec_a));
4598               HOST_WIDE_INT niter_b
4599                 = max_stmt_executions_int (get_chrec_loop (chrec_b));
4600               HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4601
4602               /* (X0, Y0) is a solution of the Diophantine equation:
4603                  "chrec_a (X0) = chrec_b (Y0)".  */
4604               HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4605                                         CEIL (-j0, j1));
4606               HOST_WIDE_INT x0 = i1 * tau1 + i0;
4607               HOST_WIDE_INT y0 = j1 * tau1 + j0;
4608
4609               /* (X1, Y1) is the smallest positive solution of the eq
4610                  "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4611                  first conflict occurs.  */
4612               HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4613               HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4614               HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4615
4616               if (niter > 0)
4617                 {
4618                   /* If the overlap occurs outside of the bounds of the
4619                      loop, there is no dependence.  */
4620                   if (x1 >= niter_a || y1 >= niter_b)
4621                     {
4622                       *overlaps_a = conflict_fn_no_dependence ();
4623                       *overlaps_b = conflict_fn_no_dependence ();
4624                       *last_conflicts = integer_zero_node;
4625                       goto end_analyze_subs_aa;
4626                     }
4627
4628                   /* max stmt executions can get quite large, avoid
4629                      overflows by using wide ints here.  */
4630                   widest_int tau2
4631                     = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4632                                 wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4633                   widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4634                   if (wi::min_precision (last_conflict, SIGNED)
4635                       <= TYPE_PRECISION (integer_type_node))
4636                     *last_conflicts
4637                        = build_int_cst (integer_type_node,
4638                                         last_conflict.to_shwi ());
4639                   else
4640                     *last_conflicts = chrec_dont_know;
4641                 }
4642               else
4643                 *last_conflicts = chrec_dont_know;
4644
4645               *overlaps_a
4646                 = conflict_fn (1,
4647                                affine_fn_univar (build_int_cst (NULL_TREE, x1),
4648                                                  1,
4649                                                  build_int_cst (NULL_TREE, i1)));
4650               *overlaps_b
4651                 = conflict_fn (1,
4652                                affine_fn_univar (build_int_cst (NULL_TREE, y1),
4653                                                  1,
4654                                                  build_int_cst (NULL_TREE, j1)));
4655             }
4656           else
4657             {
4658               /* FIXME: For the moment, the upper bound of the
4659                  iteration domain for i and j is not checked.  */
4660               if (dump_file && (dump_flags & TDF_DETAILS))
4661                 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4662               *overlaps_a = conflict_fn_not_known ();
4663               *overlaps_b = conflict_fn_not_known ();
4664               *last_conflicts = chrec_dont_know;
4665             }
4666         }
4667       else
4668         {
4669           if (dump_file && (dump_flags & TDF_DETAILS))
4670             fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4671           *overlaps_a = conflict_fn_not_known ();
4672           *overlaps_b = conflict_fn_not_known ();
4673           *last_conflicts = chrec_dont_know;
4674         }
4675     }
4676   else
4677     {
4678       if (dump_file && (dump_flags & TDF_DETAILS))
4679         fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4680       *overlaps_a = conflict_fn_not_known ();
4681       *overlaps_b = conflict_fn_not_known ();
4682       *last_conflicts = chrec_dont_know;
4683     }
4684
4685 end_analyze_subs_aa:
4686   obstack_free (&scratch_obstack, NULL);
4687   if (dump_file && (dump_flags & TDF_DETAILS))
4688     {
4689       fprintf (dump_file, "  (overlaps_a = ");
4690       dump_conflict_function (dump_file, *overlaps_a);
4691       fprintf (dump_file, ")\n  (overlaps_b = ");
4692       dump_conflict_function (dump_file, *overlaps_b);
4693       fprintf (dump_file, "))\n");
4694     }
4695 }
4696
4697 /* Returns true when analyze_subscript_affine_affine can be used for
4698    determining the dependence relation between chrec_a and chrec_b,
4699    that contain symbols.  This function modifies chrec_a and chrec_b
4700    such that the analysis result is the same, and such that they don't
4701    contain symbols, and then can safely be passed to the analyzer.
4702
4703    Example: The analysis of the following tuples of evolutions produce
4704    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4705    vs. {0, +, 1}_1
4706
4707    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4708    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4709 */
4710
4711 static bool
4712 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4713 {
4714   tree diff, type, left_a, left_b, right_b;
4715
4716   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4717       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4718     /* FIXME: For the moment not handled.  Might be refined later.  */
4719     return false;
4720
4721   type = chrec_type (*chrec_a);
4722   left_a = CHREC_LEFT (*chrec_a);
4723   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4724   diff = chrec_fold_minus (type, left_a, left_b);
4725
4726   if (!evolution_function_is_constant_p (diff))
4727     return false;
4728
4729   if (dump_file && (dump_flags & TDF_DETAILS))
4730     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4731
4732   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4733                                      diff, CHREC_RIGHT (*chrec_a));
4734   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4735   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4736                                      build_int_cst (type, 0),
4737                                      right_b);
4738   return true;
4739 }
4740
4741 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
4742    *OVERLAPS_B are initialized to the functions that describe the
4743    relation between the elements accessed twice by CHREC_A and
4744    CHREC_B.  For k >= 0, the following property is verified:
4745
4746    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4747
4748 static void
4749 analyze_siv_subscript (tree chrec_a,
4750                        tree chrec_b,
4751                        conflict_function **overlaps_a,
4752                        conflict_function **overlaps_b,
4753                        tree *last_conflicts,
4754                        int loop_nest_num)
4755 {
4756   dependence_stats.num_siv++;
4757
4758   if (dump_file && (dump_flags & TDF_DETAILS))
4759     fprintf (dump_file, "(analyze_siv_subscript \n");
4760
4761   if (evolution_function_is_constant_p (chrec_a)
4762       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4763     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4764                                       overlaps_a, overlaps_b, last_conflicts);
4765
4766   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4767            && evolution_function_is_constant_p (chrec_b))
4768     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4769                                       overlaps_b, overlaps_a, last_conflicts);
4770
4771   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4772            && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4773     {
4774       if (!chrec_contains_symbols (chrec_a)
4775           && !chrec_contains_symbols (chrec_b))
4776         {
4777           analyze_subscript_affine_affine (chrec_a, chrec_b,
4778                                            overlaps_a, overlaps_b,
4779                                            last_conflicts);
4780
4781           if (CF_NOT_KNOWN_P (*overlaps_a)
4782               || CF_NOT_KNOWN_P (*overlaps_b))
4783             dependence_stats.num_siv_unimplemented++;
4784           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4785                    || CF_NO_DEPENDENCE_P (*overlaps_b))
4786             dependence_stats.num_siv_independent++;
4787           else
4788             dependence_stats.num_siv_dependent++;
4789         }
4790       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4791                                                         &chrec_b))
4792         {
4793           analyze_subscript_affine_affine (chrec_a, chrec_b,
4794                                            overlaps_a, overlaps_b,
4795                                            last_conflicts);
4796
4797           if (CF_NOT_KNOWN_P (*overlaps_a)
4798               || CF_NOT_KNOWN_P (*overlaps_b))
4799             dependence_stats.num_siv_unimplemented++;
4800           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4801                    || CF_NO_DEPENDENCE_P (*overlaps_b))
4802             dependence_stats.num_siv_independent++;
4803           else
4804             dependence_stats.num_siv_dependent++;
4805         }
4806       else
4807         goto siv_subscript_dontknow;
4808     }
4809
4810   else
4811     {
4812     siv_subscript_dontknow:;
4813       if (dump_file && (dump_flags & TDF_DETAILS))
4814         fprintf (dump_file, "  siv test failed: unimplemented");
4815       *overlaps_a = conflict_fn_not_known ();
4816       *overlaps_b = conflict_fn_not_known ();
4817       *last_conflicts = chrec_dont_know;
4818       dependence_stats.num_siv_unimplemented++;
4819     }
4820
4821   if (dump_file && (dump_flags & TDF_DETAILS))
4822     fprintf (dump_file, ")\n");
4823 }
4824
4825 /* Returns false if we can prove that the greatest common divisor of the steps
4826    of CHREC does not divide CST, false otherwise.  */
4827
4828 static bool
4829 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4830 {
4831   HOST_WIDE_INT cd = 0, val;
4832   tree step;
4833
4834   if (!tree_fits_shwi_p (cst))
4835     return true;
4836   val = tree_to_shwi (cst);
4837
4838   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4839     {
4840       step = CHREC_RIGHT (chrec);
4841       if (!tree_fits_shwi_p (step))
4842         return true;
4843       cd = gcd (cd, tree_to_shwi (step));
4844       chrec = CHREC_LEFT (chrec);
4845     }
4846
4847   return val % cd == 0;
4848 }
4849
4850 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4851    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
4852    functions that describe the relation between the elements accessed
4853    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
4854    is verified:
4855
4856    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4857
4858 static void
4859 analyze_miv_subscript (tree chrec_a,
4860                        tree chrec_b,
4861                        conflict_function **overlaps_a,
4862                        conflict_function **overlaps_b,
4863                        tree *last_conflicts,
4864                        class loop *loop_nest)
4865 {
4866   tree type, difference;
4867
4868   dependence_stats.num_miv++;
4869   if (dump_file && (dump_flags & TDF_DETAILS))
4870     fprintf (dump_file, "(analyze_miv_subscript \n");
4871
4872   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4873   chrec_a = chrec_convert (type, chrec_a, NULL);
4874   chrec_b = chrec_convert (type, chrec_b, NULL);
4875   difference = chrec_fold_minus (type, chrec_a, chrec_b);
4876
4877   if (eq_evolutions_p (chrec_a, chrec_b))
4878     {
4879       /* Access functions are the same: all the elements are accessed
4880          in the same order.  */
4881       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4882       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4883       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4884       dependence_stats.num_miv_dependent++;
4885     }
4886
4887   else if (evolution_function_is_constant_p (difference)
4888            && evolution_function_is_affine_multivariate_p (chrec_a,
4889                                                            loop_nest->num)
4890            && !gcd_of_steps_may_divide_p (chrec_a, difference))
4891     {
4892       /* testsuite/.../ssa-chrec-33.c
4893          {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
4894
4895          The difference is 1, and all the evolution steps are multiples
4896          of 2, consequently there are no overlapping elements.  */
4897       *overlaps_a = conflict_fn_no_dependence ();
4898       *overlaps_b = conflict_fn_no_dependence ();
4899       *last_conflicts = integer_zero_node;
4900       dependence_stats.num_miv_independent++;
4901     }
4902
4903   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4904            && !chrec_contains_symbols (chrec_a, loop_nest)
4905            && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4906            && !chrec_contains_symbols (chrec_b, loop_nest))
4907     {
4908       /* testsuite/.../ssa-chrec-35.c
4909          {0, +, 1}_2  vs.  {0, +, 1}_3
4910          the overlapping elements are respectively located at iterations:
4911          {0, +, 1}_x and {0, +, 1}_x,
4912          in other words, we have the equality:
4913          {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4914
4915          Other examples:
4916          {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4917          {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4918
4919          {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4920          {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4921       */
4922       analyze_subscript_affine_affine (chrec_a, chrec_b,
4923                                        overlaps_a, overlaps_b, last_conflicts);
4924
4925       if (CF_NOT_KNOWN_P (*overlaps_a)
4926           || CF_NOT_KNOWN_P (*overlaps_b))
4927         dependence_stats.num_miv_unimplemented++;
4928       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4929                || CF_NO_DEPENDENCE_P (*overlaps_b))
4930         dependence_stats.num_miv_independent++;
4931       else
4932         dependence_stats.num_miv_dependent++;
4933     }
4934
4935   else
4936     {
4937       /* When the analysis is too difficult, answer "don't know".  */
4938       if (dump_file && (dump_flags & TDF_DETAILS))
4939         fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4940
4941       *overlaps_a = conflict_fn_not_known ();
4942       *overlaps_b = conflict_fn_not_known ();
4943       *last_conflicts = chrec_dont_know;
4944       dependence_stats.num_miv_unimplemented++;
4945     }
4946
4947   if (dump_file && (dump_flags & TDF_DETAILS))
4948     fprintf (dump_file, ")\n");
4949 }
4950
4951 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4952    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
4953    OVERLAP_ITERATIONS_B are initialized with two functions that
4954    describe the iterations that contain conflicting elements.
4955
4956    Remark: For an integer k >= 0, the following equality is true:
4957
4958    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4959 */
4960
4961 static void
4962 analyze_overlapping_iterations (tree chrec_a,
4963                                 tree chrec_b,
4964                                 conflict_function **overlap_iterations_a,
4965                                 conflict_function **overlap_iterations_b,
4966                                 tree *last_conflicts, class loop *loop_nest)
4967 {
4968   unsigned int lnn = loop_nest->num;
4969
4970   dependence_stats.num_subscript_tests++;
4971
4972   if (dump_file && (dump_flags & TDF_DETAILS))
4973     {
4974       fprintf (dump_file, "(analyze_overlapping_iterations \n");
4975       fprintf (dump_file, "  (chrec_a = ");
4976       print_generic_expr (dump_file, chrec_a);
4977       fprintf (dump_file, ")\n  (chrec_b = ");
4978       print_generic_expr (dump_file, chrec_b);
4979       fprintf (dump_file, ")\n");
4980     }
4981
4982   if (chrec_a == NULL_TREE
4983       || chrec_b == NULL_TREE
4984       || chrec_contains_undetermined (chrec_a)
4985       || chrec_contains_undetermined (chrec_b))
4986     {
4987       dependence_stats.num_subscript_undetermined++;
4988
4989       *overlap_iterations_a = conflict_fn_not_known ();
4990       *overlap_iterations_b = conflict_fn_not_known ();
4991     }
4992
4993   /* If they are the same chrec, and are affine, they overlap
4994      on every iteration.  */
4995   else if (eq_evolutions_p (chrec_a, chrec_b)
4996            && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4997                || operand_equal_p (chrec_a, chrec_b, 0)))
4998     {
4999       dependence_stats.num_same_subscript_function++;
5000       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
5001       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
5002       *last_conflicts = chrec_dont_know;
5003     }
5004
5005   /* If they aren't the same, and aren't affine, we can't do anything
5006      yet.  */
5007   else if ((chrec_contains_symbols (chrec_a)
5008             || chrec_contains_symbols (chrec_b))
5009            && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5010                || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
5011     {
5012       dependence_stats.num_subscript_undetermined++;
5013       *overlap_iterations_a = conflict_fn_not_known ();
5014       *overlap_iterations_b = conflict_fn_not_known ();
5015     }
5016
5017   else if (ziv_subscript_p (chrec_a, chrec_b))
5018     analyze_ziv_subscript (chrec_a, chrec_b,
5019                            overlap_iterations_a, overlap_iterations_b,
5020                            last_conflicts);
5021
5022   else if (siv_subscript_p (chrec_a, chrec_b))
5023     analyze_siv_subscript (chrec_a, chrec_b,
5024                            overlap_iterations_a, overlap_iterations_b,
5025                            last_conflicts, lnn);
5026
5027   else
5028     analyze_miv_subscript (chrec_a, chrec_b,
5029                            overlap_iterations_a, overlap_iterations_b,
5030                            last_conflicts, loop_nest);
5031
5032   if (dump_file && (dump_flags & TDF_DETAILS))
5033     {
5034       fprintf (dump_file, "  (overlap_iterations_a = ");
5035       dump_conflict_function (dump_file, *overlap_iterations_a);
5036       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
5037       dump_conflict_function (dump_file, *overlap_iterations_b);
5038       fprintf (dump_file, "))\n");
5039     }
5040 }
5041
5042 /* Helper function for uniquely inserting distance vectors.  */
5043
5044 static void
5045 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
5046 {
5047   for (lambda_vector v : DDR_DIST_VECTS (ddr))
5048     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
5049       return;
5050
5051   DDR_DIST_VECTS (ddr).safe_push (dist_v);
5052 }
5053
5054 /* Helper function for uniquely inserting direction vectors.  */
5055
5056 static void
5057 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
5058 {
5059   for (lambda_vector v : DDR_DIR_VECTS (ddr))
5060     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
5061       return;
5062
5063   DDR_DIR_VECTS (ddr).safe_push (dir_v);
5064 }
5065
5066 /* Add a distance of 1 on all the loops outer than INDEX.  If we
5067    haven't yet determined a distance for this outer loop, push a new
5068    distance vector composed of the previous distance, and a distance
5069    of 1 for this outer loop.  Example:
5070
5071    | loop_1
5072    |   loop_2
5073    |     A[10]
5074    |   endloop_2
5075    | endloop_1
5076
5077    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
5078    save (0, 1), then we have to save (1, 0).  */
5079
5080 static void
5081 add_outer_distances (struct data_dependence_relation *ddr,
5082                      lambda_vector dist_v, int index)
5083 {
5084   /* For each outer loop where init_v is not set, the accesses are
5085      in dependence of distance 1 in the loop.  */
5086   while (--index >= 0)
5087     {
5088       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5089       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5090       save_v[index] = 1;
5091       save_dist_v (ddr, save_v);
5092     }
5093 }
5094
5095 /* Return false when fail to represent the data dependence as a
5096    distance vector.  A_INDEX is the index of the first reference
5097    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5098    second reference.  INIT_B is set to true when a component has been
5099    added to the distance vector DIST_V.  INDEX_CARRY is then set to
5100    the index in DIST_V that carries the dependence.  */
5101
5102 static bool
5103 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
5104                              unsigned int a_index, unsigned int b_index,
5105                              lambda_vector dist_v, bool *init_b,
5106                              int *index_carry)
5107 {
5108   unsigned i;
5109   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5110   class loop *loop = DDR_LOOP_NEST (ddr)[0];
5111
5112   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5113     {
5114       tree access_fn_a, access_fn_b;
5115       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
5116
5117       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5118         {
5119           non_affine_dependence_relation (ddr);
5120           return false;
5121         }
5122
5123       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
5124       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
5125
5126       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
5127           && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
5128         {
5129           HOST_WIDE_INT dist;
5130           int index;
5131           int var_a = CHREC_VARIABLE (access_fn_a);
5132           int var_b = CHREC_VARIABLE (access_fn_b);
5133
5134           if (var_a != var_b
5135               || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5136             {
5137               non_affine_dependence_relation (ddr);
5138               return false;
5139             }
5140
5141           /* When data references are collected in a loop while data
5142              dependences are analyzed in loop nest nested in the loop, we
5143              would have more number of access functions than number of
5144              loops.  Skip access functions of loops not in the loop nest.
5145
5146              See PR89725 for more information.  */
5147           if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
5148             continue;
5149
5150           dist = int_cst_value (SUB_DISTANCE (subscript));
5151           index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
5152           *index_carry = MIN (index, *index_carry);
5153
5154           /* This is the subscript coupling test.  If we have already
5155              recorded a distance for this loop (a distance coming from
5156              another subscript), it should be the same.  For example,
5157              in the following code, there is no dependence:
5158
5159              | loop i = 0, N, 1
5160              |   T[i+1][i] = ...
5161              |   ... = T[i][i]
5162              | endloop
5163           */
5164           if (init_v[index] != 0 && dist_v[index] != dist)
5165             {
5166               finalize_ddr_dependent (ddr, chrec_known);
5167               return false;
5168             }
5169
5170           dist_v[index] = dist;
5171           init_v[index] = 1;
5172           *init_b = true;
5173         }
5174       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
5175         {
5176           /* This can be for example an affine vs. constant dependence
5177              (T[i] vs. T[3]) that is not an affine dependence and is
5178              not representable as a distance vector.  */
5179           non_affine_dependence_relation (ddr);
5180           return false;
5181         }
5182       else
5183         *init_b = true;
5184     }
5185
5186   return true;
5187 }
5188
5189 /* Return true when the DDR contains only invariant access functions wrto. loop
5190    number LNUM.  */
5191
5192 static bool
5193 invariant_access_functions (const struct data_dependence_relation *ddr,
5194                             int lnum)
5195 {
5196   for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5197     if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
5198         || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
5199       return false;
5200
5201   return true;
5202 }
5203
5204 /* Helper function for the case where DDR_A and DDR_B are the same
5205    multivariate access function with a constant step.  For an example
5206    see pr34635-1.c.  */
5207
5208 static void
5209 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
5210 {
5211   int x_1, x_2;
5212   tree c_1 = CHREC_LEFT (c_2);
5213   tree c_0 = CHREC_LEFT (c_1);
5214   lambda_vector dist_v;
5215   HOST_WIDE_INT v1, v2, cd;
5216
5217   /* Polynomials with more than 2 variables are not handled yet.  When
5218      the evolution steps are parameters, it is not possible to
5219      represent the dependence using classical distance vectors.  */
5220   if (TREE_CODE (c_0) != INTEGER_CST
5221       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
5222       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
5223     {
5224       DDR_AFFINE_P (ddr) = false;
5225       return;
5226     }
5227
5228   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
5229   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
5230
5231   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
5232   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5233   v1 = int_cst_value (CHREC_RIGHT (c_1));
5234   v2 = int_cst_value (CHREC_RIGHT (c_2));
5235   cd = gcd (v1, v2);
5236   v1 /= cd;
5237   v2 /= cd;
5238
5239   if (v2 < 0)
5240     {
5241       v2 = -v2;
5242       v1 = -v1;
5243     }
5244
5245   dist_v[x_1] = v2;
5246   dist_v[x_2] = -v1;
5247   save_dist_v (ddr, dist_v);
5248
5249   add_outer_distances (ddr, dist_v, x_1);
5250 }
5251
5252 /* Helper function for the case where DDR_A and DDR_B are the same
5253    access functions.  */
5254
5255 static void
5256 add_other_self_distances (struct data_dependence_relation *ddr)
5257 {
5258   lambda_vector dist_v;
5259   unsigned i;
5260   int index_carry = DDR_NB_LOOPS (ddr);
5261   subscript *sub;
5262   class loop *loop = DDR_LOOP_NEST (ddr)[0];
5263
5264   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5265     {
5266       tree access_fun = SUB_ACCESS_FN (sub, 0);
5267
5268       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
5269         {
5270           if (!evolution_function_is_univariate_p (access_fun, loop->num))
5271             {
5272               if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
5273                 {
5274                   DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
5275                   return;
5276                 }
5277
5278               access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
5279
5280               if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
5281                 add_multivariate_self_dist (ddr, access_fun);
5282               else
5283                 /* The evolution step is not constant: it varies in
5284                    the outer loop, so this cannot be represented by a
5285                    distance vector.  For example in pr34635.c the
5286                    evolution is {0, +, {0, +, 4}_1}_2.  */
5287                 DDR_AFFINE_P (ddr) = false;
5288
5289               return;
5290             }
5291
5292           /* When data references are collected in a loop while data
5293              dependences are analyzed in loop nest nested in the loop, we
5294              would have more number of access functions than number of
5295              loops.  Skip access functions of loops not in the loop nest.
5296
5297              See PR89725 for more information.  */
5298           if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
5299                                   loop))
5300             continue;
5301
5302           index_carry = MIN (index_carry,
5303                              index_in_loop_nest (CHREC_VARIABLE (access_fun),
5304                                                  DDR_LOOP_NEST (ddr)));
5305         }
5306     }
5307
5308   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5309   add_outer_distances (ddr, dist_v, index_carry);
5310 }
5311
5312 static void
5313 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
5314 {
5315   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5316
5317   dist_v[0] = 1;
5318   save_dist_v (ddr, dist_v);
5319 }
5320
5321 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
5322    is the case for example when access functions are the same and
5323    equal to a constant, as in:
5324
5325    | loop_1
5326    |   A[3] = ...
5327    |   ... = A[3]
5328    | endloop_1
5329
5330    in which case the distance vectors are (0) and (1).  */
5331
5332 static void
5333 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
5334 {
5335   unsigned i, j;
5336
5337   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5338     {
5339       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
5340       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
5341       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
5342
5343       for (j = 0; j < ca->n; j++)
5344         if (affine_function_zero_p (ca->fns[j]))
5345           {
5346             insert_innermost_unit_dist_vector (ddr);
5347             return;
5348           }
5349
5350       for (j = 0; j < cb->n; j++)
5351         if (affine_function_zero_p (cb->fns[j]))
5352           {
5353             insert_innermost_unit_dist_vector (ddr);
5354             return;
5355           }
5356     }
5357 }
5358
5359 /* Return true when the DDR contains two data references that have the
5360    same access functions.  */
5361
5362 static inline bool
5363 same_access_functions (const struct data_dependence_relation *ddr)
5364 {
5365   for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5366     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5367                           SUB_ACCESS_FN (sub, 1)))
5368       return false;
5369
5370   return true;
5371 }
5372
5373 /* Compute the classic per loop distance vector.  DDR is the data
5374    dependence relation to build a vector from.  Return false when fail
5375    to represent the data dependence as a distance vector.  */
5376
5377 static bool
5378 build_classic_dist_vector (struct data_dependence_relation *ddr,
5379                            class loop *loop_nest)
5380 {
5381   bool init_b = false;
5382   int index_carry = DDR_NB_LOOPS (ddr);
5383   lambda_vector dist_v;
5384
5385   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5386     return false;
5387
5388   if (same_access_functions (ddr))
5389     {
5390       /* Save the 0 vector.  */
5391       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5392       save_dist_v (ddr, dist_v);
5393
5394       if (invariant_access_functions (ddr, loop_nest->num))
5395         add_distance_for_zero_overlaps (ddr);
5396
5397       if (DDR_NB_LOOPS (ddr) > 1)
5398         add_other_self_distances (ddr);
5399
5400       return true;
5401     }
5402
5403   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5404   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5405     return false;
5406
5407   /* Save the distance vector if we initialized one.  */
5408   if (init_b)
5409     {
5410       /* Verify a basic constraint: classic distance vectors should
5411          always be lexicographically positive.
5412
5413          Data references are collected in the order of execution of
5414          the program, thus for the following loop
5415
5416          | for (i = 1; i < 100; i++)
5417          |   for (j = 1; j < 100; j++)
5418          |     {
5419          |       t = T[j+1][i-1];  // A
5420          |       T[j][i] = t + 2;  // B
5421          |     }
5422
5423          references are collected following the direction of the wind:
5424          A then B.  The data dependence tests are performed also
5425          following this order, such that we're looking at the distance
5426          separating the elements accessed by A from the elements later
5427          accessed by B.  But in this example, the distance returned by
5428          test_dep (A, B) is lexicographically negative (-1, 1), that
5429          means that the access A occurs later than B with respect to
5430          the outer loop, ie. we're actually looking upwind.  In this
5431          case we solve test_dep (B, A) looking downwind to the
5432          lexicographically positive solution, that returns the
5433          distance vector (1, -1).  */
5434       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5435         {
5436           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5437           if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5438             return false;
5439           compute_subscript_distance (ddr);
5440           if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5441                                             &index_carry))
5442             return false;
5443           save_dist_v (ddr, save_v);
5444           DDR_REVERSED_P (ddr) = true;
5445
5446           /* In this case there is a dependence forward for all the
5447              outer loops:
5448
5449              | for (k = 1; k < 100; k++)
5450              |  for (i = 1; i < 100; i++)
5451              |   for (j = 1; j < 100; j++)
5452              |     {
5453              |       t = T[j+1][i-1];  // A
5454              |       T[j][i] = t + 2;  // B
5455              |     }
5456
5457              the vectors are:
5458              (0,  1, -1)
5459              (1,  1, -1)
5460              (1, -1,  1)
5461           */
5462           if (DDR_NB_LOOPS (ddr) > 1)
5463             {
5464               add_outer_distances (ddr, save_v, index_carry);
5465               add_outer_distances (ddr, dist_v, index_carry);
5466             }
5467         }
5468       else
5469         {
5470           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5471           lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5472
5473           if (DDR_NB_LOOPS (ddr) > 1)
5474             {
5475               lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5476
5477               if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5478                 return false;
5479               compute_subscript_distance (ddr);
5480               if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5481                                                 &index_carry))
5482                 return false;
5483
5484               save_dist_v (ddr, save_v);
5485               add_outer_distances (ddr, dist_v, index_carry);
5486               add_outer_distances (ddr, opposite_v, index_carry);
5487             }
5488           else
5489             save_dist_v (ddr, save_v);
5490         }
5491     }
5492   else
5493     {
5494       /* There is a distance of 1 on all the outer loops: Example:
5495          there is a dependence of distance 1 on loop_1 for the array A.
5496
5497          | loop_1
5498          |   A[5] = ...
5499          | endloop
5500       */
5501       add_outer_distances (ddr, dist_v,
5502                            lambda_vector_first_nz (dist_v,
5503                                                    DDR_NB_LOOPS (ddr), 0));
5504     }
5505
5506   if (dump_file && (dump_flags & TDF_DETAILS))
5507     {
5508       unsigned i;
5509
5510       fprintf (dump_file, "(build_classic_dist_vector\n");
5511       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5512         {
5513           fprintf (dump_file, "  dist_vector = (");
5514           print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5515                                DDR_NB_LOOPS (ddr));
5516           fprintf (dump_file, "  )\n");
5517         }
5518       fprintf (dump_file, ")\n");
5519     }
5520
5521   return true;
5522 }
5523
5524 /* Return the direction for a given distance.
5525    FIXME: Computing dir this way is suboptimal, since dir can catch
5526    cases that dist is unable to represent.  */
5527
5528 static inline enum data_dependence_direction
5529 dir_from_dist (int dist)
5530 {
5531   if (dist > 0)
5532     return dir_positive;
5533   else if (dist < 0)
5534     return dir_negative;
5535   else
5536     return dir_equal;
5537 }
5538
5539 /* Compute the classic per loop direction vector.  DDR is the data
5540    dependence relation to build a vector from.  */
5541
5542 static void
5543 build_classic_dir_vector (struct data_dependence_relation *ddr)
5544 {
5545   unsigned i, j;
5546   lambda_vector dist_v;
5547
5548   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5549     {
5550       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5551
5552       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5553         dir_v[j] = dir_from_dist (dist_v[j]);
5554
5555       save_dir_v (ddr, dir_v);
5556     }
5557 }
5558
5559 /* Helper function.  Returns true when there is a dependence between the
5560    data references.  A_INDEX is the index of the first reference (0 for
5561    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
5562
5563 static bool
5564 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5565                                unsigned int a_index, unsigned int b_index,
5566                                class loop *loop_nest)
5567 {
5568   unsigned int i;
5569   tree last_conflicts;
5570   struct subscript *subscript;
5571   tree res = NULL_TREE;
5572
5573   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5574     {
5575       conflict_function *overlaps_a, *overlaps_b;
5576
5577       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5578                                       SUB_ACCESS_FN (subscript, b_index),
5579                                       &overlaps_a, &overlaps_b,
5580                                       &last_conflicts, loop_nest);
5581
5582       if (SUB_CONFLICTS_IN_A (subscript))
5583         free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5584       if (SUB_CONFLICTS_IN_B (subscript))
5585         free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5586
5587       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5588       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5589       SUB_LAST_CONFLICT (subscript) = last_conflicts;
5590
5591       /* If there is any undetermined conflict function we have to
5592          give a conservative answer in case we cannot prove that
5593          no dependence exists when analyzing another subscript.  */
5594       if (CF_NOT_KNOWN_P (overlaps_a)
5595           || CF_NOT_KNOWN_P (overlaps_b))
5596         {
5597           res = chrec_dont_know;
5598           continue;
5599         }
5600
5601       /* When there is a subscript with no dependence we can stop.  */
5602       else if (CF_NO_DEPENDENCE_P (overlaps_a)
5603                || CF_NO_DEPENDENCE_P (overlaps_b))
5604         {
5605           res = chrec_known;
5606           break;
5607         }
5608     }
5609
5610   if (res == NULL_TREE)
5611     return true;
5612
5613   if (res == chrec_known)
5614     dependence_stats.num_dependence_independent++;
5615   else
5616     dependence_stats.num_dependence_undetermined++;
5617   finalize_ddr_dependent (ddr, res);
5618   return false;
5619 }
5620
5621 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
5622
5623 static void
5624 subscript_dependence_tester (struct data_dependence_relation *ddr,
5625                              class loop *loop_nest)
5626 {
5627   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5628     dependence_stats.num_dependence_dependent++;
5629
5630   compute_subscript_distance (ddr);
5631   if (build_classic_dist_vector (ddr, loop_nest))
5632     build_classic_dir_vector (ddr);
5633 }
5634
5635 /* Returns true when all the access functions of A are affine or
5636    constant with respect to LOOP_NEST.  */
5637
5638 static bool
5639 access_functions_are_affine_or_constant_p (const struct data_reference *a,
5640                                            const class loop *loop_nest)
5641 {
5642   vec<tree> fns = DR_ACCESS_FNS (a);
5643   for (tree t : fns)
5644     if (!evolution_function_is_invariant_p (t, loop_nest->num)
5645         && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5646       return false;
5647
5648   return true;
5649 }
5650
5651 /* This computes the affine dependence relation between A and B with
5652    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
5653    independence between two accesses, while CHREC_DONT_KNOW is used
5654    for representing the unknown relation.
5655
5656    Note that it is possible to stop the computation of the dependence
5657    relation the first time we detect a CHREC_KNOWN element for a given
5658    subscript.  */
5659
5660 void
5661 compute_affine_dependence (struct data_dependence_relation *ddr,
5662                            class loop *loop_nest)
5663 {
5664   struct data_reference *dra = DDR_A (ddr);
5665   struct data_reference *drb = DDR_B (ddr);
5666
5667   if (dump_file && (dump_flags & TDF_DETAILS))
5668     {
5669       fprintf (dump_file, "(compute_affine_dependence\n");
5670       fprintf (dump_file, "  ref_a: ");
5671       print_generic_expr (dump_file, DR_REF (dra));
5672       fprintf (dump_file, ", stmt_a: ");
5673       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5674       fprintf (dump_file, "  ref_b: ");
5675       print_generic_expr (dump_file, DR_REF (drb));
5676       fprintf (dump_file, ", stmt_b: ");
5677       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5678     }
5679
5680   /* Analyze only when the dependence relation is not yet known.  */
5681   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5682     {
5683       dependence_stats.num_dependence_tests++;
5684
5685       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5686           && access_functions_are_affine_or_constant_p (drb, loop_nest))
5687         subscript_dependence_tester (ddr, loop_nest);
5688
5689       /* As a last case, if the dependence cannot be determined, or if
5690          the dependence is considered too difficult to determine, answer
5691          "don't know".  */
5692       else
5693         {
5694           dependence_stats.num_dependence_undetermined++;
5695
5696           if (dump_file && (dump_flags & TDF_DETAILS))
5697             {
5698               fprintf (dump_file, "Data ref a:\n");
5699               dump_data_reference (dump_file, dra);
5700               fprintf (dump_file, "Data ref b:\n");
5701               dump_data_reference (dump_file, drb);
5702               fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5703             }
5704           finalize_ddr_dependent (ddr, chrec_dont_know);
5705         }
5706     }
5707
5708   if (dump_file && (dump_flags & TDF_DETAILS))
5709     {
5710       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5711         fprintf (dump_file, ") -> no dependence\n");
5712       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5713         fprintf (dump_file, ") -> dependence analysis failed\n");
5714       else
5715         fprintf (dump_file, ")\n");
5716     }
5717 }
5718
5719 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5720    the data references in DATAREFS, in the LOOP_NEST.  When
5721    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5722    relations.  Return true when successful, i.e. data references number
5723    is small enough to be handled.  */
5724
5725 bool
5726 compute_all_dependences (const vec<data_reference_p> &datarefs,
5727                          vec<ddr_p> *dependence_relations,
5728                          const vec<loop_p> &loop_nest,
5729                          bool compute_self_and_rr)
5730 {
5731   struct data_dependence_relation *ddr;
5732   struct data_reference *a, *b;
5733   unsigned int i, j;
5734
5735   if ((int) datarefs.length ()
5736       > param_loop_max_datarefs_for_datadeps)
5737     {
5738       struct data_dependence_relation *ddr;
5739
5740       /* Insert a single relation into dependence_relations:
5741          chrec_dont_know.  */
5742       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5743       dependence_relations->safe_push (ddr);
5744       return false;
5745     }
5746
5747   FOR_EACH_VEC_ELT (datarefs, i, a)
5748     for (j = i + 1; datarefs.iterate (j, &b); j++)
5749       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5750         {
5751           ddr = initialize_data_dependence_relation (a, b, loop_nest);
5752           dependence_relations->safe_push (ddr);
5753           if (loop_nest.exists ())
5754             compute_affine_dependence (ddr, loop_nest[0]);
5755         }
5756
5757   if (compute_self_and_rr)
5758     FOR_EACH_VEC_ELT (datarefs, i, a)
5759       {
5760         ddr = initialize_data_dependence_relation (a, a, loop_nest);
5761         dependence_relations->safe_push (ddr);
5762         if (loop_nest.exists ())
5763           compute_affine_dependence (ddr, loop_nest[0]);
5764       }
5765
5766   return true;
5767 }
5768
5769 /* Describes a location of a memory reference.  */
5770
5771 struct data_ref_loc
5772 {
5773   /* The memory reference.  */
5774   tree ref;
5775
5776   /* True if the memory reference is read.  */
5777   bool is_read;
5778
5779   /* True if the data reference is conditional within the containing
5780      statement, i.e. if it might not occur even when the statement
5781      is executed and runs to completion.  */
5782   bool is_conditional_in_stmt;
5783 };
5784
5785
5786 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
5787    true if STMT clobbers memory, false otherwise.  */
5788
5789 static bool
5790 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5791 {
5792   bool clobbers_memory = false;
5793   data_ref_loc ref;
5794   tree op0, op1;
5795   enum gimple_code stmt_code = gimple_code (stmt);
5796
5797   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5798      As we cannot model data-references to not spelled out
5799      accesses give up if they may occur.  */
5800   if (stmt_code == GIMPLE_CALL
5801       && !(gimple_call_flags (stmt) & ECF_CONST))
5802     {
5803       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
5804       if (gimple_call_internal_p (stmt))
5805         switch (gimple_call_internal_fn (stmt))
5806           {
5807           case IFN_GOMP_SIMD_LANE:
5808             {
5809               class loop *loop = gimple_bb (stmt)->loop_father;
5810               tree uid = gimple_call_arg (stmt, 0);
5811               gcc_assert (TREE_CODE (uid) == SSA_NAME);
5812               if (loop == NULL
5813                   || loop->simduid != SSA_NAME_VAR (uid))
5814                 clobbers_memory = true;
5815               break;
5816             }
5817           case IFN_MASK_LOAD:
5818           case IFN_MASK_STORE:
5819             break;
5820           default:
5821             clobbers_memory = true;
5822             break;
5823           }
5824       else
5825         clobbers_memory = true;
5826     }
5827   else if (stmt_code == GIMPLE_ASM
5828            && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5829                || gimple_vuse (stmt)))
5830     clobbers_memory = true;
5831
5832   if (!gimple_vuse (stmt))
5833     return clobbers_memory;
5834
5835   if (stmt_code == GIMPLE_ASSIGN)
5836     {
5837       tree base;
5838       op0 = gimple_assign_lhs (stmt);
5839       op1 = gimple_assign_rhs1 (stmt);
5840
5841       if (DECL_P (op1)
5842           || (REFERENCE_CLASS_P (op1)
5843               && (base = get_base_address (op1))
5844               && TREE_CODE (base) != SSA_NAME
5845               && !is_gimple_min_invariant (base)))
5846         {
5847           ref.ref = op1;
5848           ref.is_read = true;
5849           ref.is_conditional_in_stmt = false;
5850           references->safe_push (ref);
5851         }
5852     }
5853   else if (stmt_code == GIMPLE_CALL)
5854     {
5855       unsigned i, n;
5856       tree ptr, type;
5857       unsigned int align;
5858
5859       ref.is_read = false;
5860       if (gimple_call_internal_p (stmt))
5861         switch (gimple_call_internal_fn (stmt))
5862           {
5863           case IFN_MASK_LOAD:
5864             if (gimple_call_lhs (stmt) == NULL_TREE)
5865               break;
5866             ref.is_read = true;
5867             /* FALLTHRU */
5868           case IFN_MASK_STORE:
5869             ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5870             align = tree_to_shwi (gimple_call_arg (stmt, 1));
5871             if (ref.is_read)
5872               type = TREE_TYPE (gimple_call_lhs (stmt));
5873             else
5874               type = TREE_TYPE (gimple_call_arg (stmt, 3));
5875             if (TYPE_ALIGN (type) != align)
5876               type = build_aligned_type (type, align);
5877             ref.is_conditional_in_stmt = true;
5878             ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5879                                    ptr);
5880             references->safe_push (ref);
5881             return false;
5882           default:
5883             break;
5884           }
5885
5886       op0 = gimple_call_lhs (stmt);
5887       n = gimple_call_num_args (stmt);
5888       for (i = 0; i < n; i++)
5889         {
5890           op1 = gimple_call_arg (stmt, i);
5891
5892           if (DECL_P (op1)
5893               || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5894             {
5895               ref.ref = op1;
5896               ref.is_read = true;
5897               ref.is_conditional_in_stmt = false;
5898               references->safe_push (ref);
5899             }
5900         }
5901     }
5902   else
5903     return clobbers_memory;
5904
5905   if (op0
5906       && (DECL_P (op0)
5907           || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5908     {
5909       ref.ref = op0;
5910       ref.is_read = false;
5911       ref.is_conditional_in_stmt = false;
5912       references->safe_push (ref);
5913     }
5914   return clobbers_memory;
5915 }
5916
5917
5918 /* Returns true if the loop-nest has any data reference.  */
5919
5920 bool
5921 loop_nest_has_data_refs (loop_p loop)
5922 {
5923   basic_block *bbs = get_loop_body (loop);
5924   auto_vec<data_ref_loc, 3> references;
5925
5926   for (unsigned i = 0; i < loop->num_nodes; i++)
5927     {
5928       basic_block bb = bbs[i];
5929       gimple_stmt_iterator bsi;
5930
5931       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5932         {
5933           gimple *stmt = gsi_stmt (bsi);
5934           get_references_in_stmt (stmt, &references);
5935           if (references.length ())
5936             {
5937               free (bbs);
5938               return true;
5939             }
5940         }
5941     }
5942   free (bbs);
5943   return false;
5944 }
5945
5946 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
5947    reference, returns false, otherwise returns true.  NEST is the outermost
5948    loop of the loop nest in which the references should be analyzed.  */
5949
5950 opt_result
5951 find_data_references_in_stmt (class loop *nest, gimple *stmt,
5952                               vec<data_reference_p> *datarefs)
5953 {
5954   auto_vec<data_ref_loc, 2> references;
5955   data_reference_p dr;
5956
5957   if (get_references_in_stmt (stmt, &references))
5958     return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5959                                    stmt);
5960
5961   for (const data_ref_loc &ref : references)
5962     {
5963       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5964                             loop_containing_stmt (stmt), ref.ref,
5965                             stmt, ref.is_read, ref.is_conditional_in_stmt);
5966       gcc_assert (dr != NULL);
5967       datarefs->safe_push (dr);
5968     }
5969
5970   return opt_result::success ();
5971 }
5972
5973 /* Stores the data references in STMT to DATAREFS.  If there is an
5974    unanalyzable reference, returns false, otherwise returns true.
5975    NEST is the outermost loop of the loop nest in which the references
5976    should be instantiated, LOOP is the loop in which the references
5977    should be analyzed.  */
5978
5979 bool
5980 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5981                                        vec<data_reference_p> *datarefs)
5982 {
5983   auto_vec<data_ref_loc, 2> references;
5984   bool ret = true;
5985   data_reference_p dr;
5986
5987   if (get_references_in_stmt (stmt, &references))
5988     return false;
5989
5990   for (const data_ref_loc &ref : references)
5991     {
5992       dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read,
5993                             ref.is_conditional_in_stmt);
5994       gcc_assert (dr != NULL);
5995       datarefs->safe_push (dr);
5996     }
5997
5998   return ret;
5999 }
6000
6001 /* Search the data references in LOOP, and record the information into
6002    DATAREFS.  Returns chrec_dont_know when failing to analyze a
6003    difficult case, returns NULL_TREE otherwise.  */
6004
6005 tree
6006 find_data_references_in_bb (class loop *loop, basic_block bb,
6007                             vec<data_reference_p> *datarefs)
6008 {
6009   gimple_stmt_iterator bsi;
6010
6011   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
6012     {
6013       gimple *stmt = gsi_stmt (bsi);
6014
6015       if (!find_data_references_in_stmt (loop, stmt, datarefs))
6016         {
6017           struct data_reference *res;
6018           res = XCNEW (struct data_reference);
6019           datarefs->safe_push (res);
6020
6021           return chrec_dont_know;
6022         }
6023     }
6024
6025   return NULL_TREE;
6026 }
6027
6028 /* Search the data references in LOOP, and record the information into
6029    DATAREFS.  Returns chrec_dont_know when failing to analyze a
6030    difficult case, returns NULL_TREE otherwise.
6031
6032    TODO: This function should be made smarter so that it can handle address
6033    arithmetic as if they were array accesses, etc.  */
6034
6035 tree
6036 find_data_references_in_loop (class loop *loop,
6037                               vec<data_reference_p> *datarefs)
6038 {
6039   basic_block bb, *bbs;
6040   unsigned int i;
6041
6042   bbs = get_loop_body_in_dom_order (loop);
6043
6044   for (i = 0; i < loop->num_nodes; i++)
6045     {
6046       bb = bbs[i];
6047
6048       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
6049         {
6050           free (bbs);
6051           return chrec_dont_know;
6052         }
6053     }
6054   free (bbs);
6055
6056   return NULL_TREE;
6057 }
6058
6059 /* Return the alignment in bytes that DRB is guaranteed to have at all
6060    times.  */
6061
6062 unsigned int
6063 dr_alignment (innermost_loop_behavior *drb)
6064 {
6065   /* Get the alignment of BASE_ADDRESS + INIT.  */
6066   unsigned int alignment = drb->base_alignment;
6067   unsigned int misalignment = (drb->base_misalignment
6068                                + TREE_INT_CST_LOW (drb->init));
6069   if (misalignment != 0)
6070     alignment = MIN (alignment, misalignment & -misalignment);
6071
6072   /* Cap it to the alignment of OFFSET.  */
6073   if (!integer_zerop (drb->offset))
6074     alignment = MIN (alignment, drb->offset_alignment);
6075
6076   /* Cap it to the alignment of STEP.  */
6077   if (!integer_zerop (drb->step))
6078     alignment = MIN (alignment, drb->step_alignment);
6079
6080   return alignment;
6081 }
6082
6083 /* If BASE is a pointer-typed SSA name, try to find the object that it
6084    is based on.  Return this object X on success and store the alignment
6085    in bytes of BASE - &X in *ALIGNMENT_OUT.  */
6086
6087 static tree
6088 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
6089 {
6090   if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
6091     return NULL_TREE;
6092
6093   gimple *def = SSA_NAME_DEF_STMT (base);
6094   base = analyze_scalar_evolution (loop_containing_stmt (def), base);
6095
6096   /* Peel chrecs and record the minimum alignment preserved by
6097      all steps.  */
6098   unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6099   while (TREE_CODE (base) == POLYNOMIAL_CHREC)
6100     {
6101       unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
6102       alignment = MIN (alignment, step_alignment);
6103       base = CHREC_LEFT (base);
6104     }
6105
6106   /* Punt if the expression is too complicated to handle.  */
6107   if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
6108     return NULL_TREE;
6109
6110   /* The only useful cases are those for which a dereference folds to something
6111      other than an INDIRECT_REF.  */
6112   tree ref_type = TREE_TYPE (TREE_TYPE (base));
6113   tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
6114   if (!ref)
6115     return NULL_TREE;
6116
6117   /* Analyze the base to which the steps we peeled were applied.  */
6118   poly_int64 bitsize, bitpos, bytepos;
6119   machine_mode mode;
6120   int unsignedp, reversep, volatilep;
6121   tree offset;
6122   base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
6123                               &unsignedp, &reversep, &volatilep);
6124   if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
6125     return NULL_TREE;
6126
6127   /* Restrict the alignment to that guaranteed by the offsets.  */
6128   unsigned int bytepos_alignment = known_alignment (bytepos);
6129   if (bytepos_alignment != 0)
6130     alignment = MIN (alignment, bytepos_alignment);
6131   if (offset)
6132     {
6133       unsigned int offset_alignment = highest_pow2_factor (offset);
6134       alignment = MIN (alignment, offset_alignment);
6135     }
6136
6137   *alignment_out = alignment;
6138   return base;
6139 }
6140
6141 /* Return the object whose alignment would need to be changed in order
6142    to increase the alignment of ADDR.  Store the maximum achievable
6143    alignment in *MAX_ALIGNMENT.  */
6144
6145 tree
6146 get_base_for_alignment (tree addr, unsigned int *max_alignment)
6147 {
6148   tree base = get_base_for_alignment_1 (addr, max_alignment);
6149   if (base)
6150     return base;
6151
6152   if (TREE_CODE (addr) == ADDR_EXPR)
6153     addr = TREE_OPERAND (addr, 0);
6154   *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6155   return addr;
6156 }
6157
6158 /* Recursive helper function.  */
6159
6160 static bool
6161 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
6162 {
6163   /* Inner loops of the nest should not contain siblings.  Example:
6164      when there are two consecutive loops,
6165
6166      | loop_0
6167      |   loop_1
6168      |     A[{0, +, 1}_1]
6169      |   endloop_1
6170      |   loop_2
6171      |     A[{0, +, 1}_2]
6172      |   endloop_2
6173      | endloop_0
6174
6175      the dependence relation cannot be captured by the distance
6176      abstraction.  */
6177   if (loop->next)
6178     return false;
6179
6180   loop_nest->safe_push (loop);
6181   if (loop->inner)
6182     return find_loop_nest_1 (loop->inner, loop_nest);
6183   return true;
6184 }
6185
6186 /* Return false when the LOOP is not well nested.  Otherwise return
6187    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
6188    contain the loops from the outermost to the innermost, as they will
6189    appear in the classic distance vector.  */
6190
6191 bool
6192 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
6193 {
6194   loop_nest->safe_push (loop);
6195   if (loop->inner)
6196     return find_loop_nest_1 (loop->inner, loop_nest);
6197   return true;
6198 }
6199
6200 /* Returns true when the data dependences have been computed, false otherwise.
6201    Given a loop nest LOOP, the following vectors are returned:
6202    DATAREFS is initialized to all the array elements contained in this loop,
6203    DEPENDENCE_RELATIONS contains the relations between the data references.
6204    Compute read-read and self relations if
6205    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
6206
6207 bool
6208 compute_data_dependences_for_loop (class loop *loop,
6209                                    bool compute_self_and_read_read_dependences,
6210                                    vec<loop_p> *loop_nest,
6211                                    vec<data_reference_p> *datarefs,
6212                                    vec<ddr_p> *dependence_relations)
6213 {
6214   bool res = true;
6215
6216   memset (&dependence_stats, 0, sizeof (dependence_stats));
6217
6218   /* If the loop nest is not well formed, or one of the data references
6219      is not computable, give up without spending time to compute other
6220      dependences.  */
6221   if (!loop
6222       || !find_loop_nest (loop, loop_nest)
6223       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
6224       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
6225                                    compute_self_and_read_read_dependences))
6226     res = false;
6227
6228   if (dump_file && (dump_flags & TDF_STATS))
6229     {
6230       fprintf (dump_file, "Dependence tester statistics:\n");
6231
6232       fprintf (dump_file, "Number of dependence tests: %d\n",
6233                dependence_stats.num_dependence_tests);
6234       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
6235                dependence_stats.num_dependence_dependent);
6236       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
6237                dependence_stats.num_dependence_independent);
6238       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
6239                dependence_stats.num_dependence_undetermined);
6240
6241       fprintf (dump_file, "Number of subscript tests: %d\n",
6242                dependence_stats.num_subscript_tests);
6243       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
6244                dependence_stats.num_subscript_undetermined);
6245       fprintf (dump_file, "Number of same subscript function: %d\n",
6246                dependence_stats.num_same_subscript_function);
6247
6248       fprintf (dump_file, "Number of ziv tests: %d\n",
6249                dependence_stats.num_ziv);
6250       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
6251                dependence_stats.num_ziv_dependent);
6252       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
6253                dependence_stats.num_ziv_independent);
6254       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
6255                dependence_stats.num_ziv_unimplemented);
6256
6257       fprintf (dump_file, "Number of siv tests: %d\n",
6258                dependence_stats.num_siv);
6259       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
6260                dependence_stats.num_siv_dependent);
6261       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
6262                dependence_stats.num_siv_independent);
6263       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
6264                dependence_stats.num_siv_unimplemented);
6265
6266       fprintf (dump_file, "Number of miv tests: %d\n",
6267                dependence_stats.num_miv);
6268       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
6269                dependence_stats.num_miv_dependent);
6270       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
6271                dependence_stats.num_miv_independent);
6272       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
6273                dependence_stats.num_miv_unimplemented);
6274     }
6275
6276   return res;
6277 }
6278
6279 /* Free the memory used by a data dependence relation DDR.  */
6280
6281 void
6282 free_dependence_relation (struct data_dependence_relation *ddr)
6283 {
6284   if (ddr == NULL)
6285     return;
6286
6287   if (DDR_SUBSCRIPTS (ddr).exists ())
6288     free_subscripts (DDR_SUBSCRIPTS (ddr));
6289   DDR_DIST_VECTS (ddr).release ();
6290   DDR_DIR_VECTS (ddr).release ();
6291
6292   free (ddr);
6293 }
6294
6295 /* Free the memory used by the data dependence relations from
6296    DEPENDENCE_RELATIONS.  */
6297
6298 void
6299 free_dependence_relations (vec<ddr_p>& dependence_relations)
6300 {
6301   for (data_dependence_relation *ddr : dependence_relations)
6302     if (ddr)
6303       free_dependence_relation (ddr);
6304
6305   dependence_relations.release ();
6306 }
6307
6308 /* Free the memory used by the data references from DATAREFS.  */
6309
6310 void
6311 free_data_refs (vec<data_reference_p>& datarefs)
6312 {
6313   for (data_reference *dr : datarefs)
6314     free_data_ref (dr);
6315   datarefs.release ();
6316 }
6317
6318 /* Common routine implementing both dr_direction_indicator and
6319    dr_zero_step_indicator.  Return USEFUL_MIN if the indicator is known
6320    to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6321    Return the step as the indicator otherwise.  */
6322
6323 static tree
6324 dr_step_indicator (struct data_reference *dr, int useful_min)
6325 {
6326   tree step = DR_STEP (dr);
6327   if (!step)
6328     return NULL_TREE;
6329   STRIP_NOPS (step);
6330   /* Look for cases where the step is scaled by a positive constant
6331      integer, which will often be the access size.  If the multiplication
6332      doesn't change the sign (due to overflow effects) then we can
6333      test the unscaled value instead.  */
6334   if (TREE_CODE (step) == MULT_EXPR
6335       && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6336       && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6337     {
6338       tree factor = TREE_OPERAND (step, 1);
6339       step = TREE_OPERAND (step, 0);
6340
6341       /* Strip widening and truncating conversions as well as nops.  */
6342       if (CONVERT_EXPR_P (step)
6343           && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6344         step = TREE_OPERAND (step, 0);
6345       tree type = TREE_TYPE (step);
6346
6347       /* Get the range of step values that would not cause overflow.  */
6348       widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6349                          / wi::to_widest (factor));
6350       widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6351                          / wi::to_widest (factor));
6352
6353       /* Get the range of values that the unconverted step actually has.  */
6354       wide_int step_min, step_max;
6355       value_range vr;
6356       if (TREE_CODE (step) != SSA_NAME
6357           || !get_range_query (cfun)->range_of_expr (vr, step)
6358           || vr.undefined_p ())
6359         {
6360           step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6361           step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6362         }
6363       else
6364         {
6365           step_min = vr.lower_bound ();
6366           step_max = vr.upper_bound ();
6367         }
6368
6369       /* Check whether the unconverted step has an acceptable range.  */
6370       signop sgn = TYPE_SIGN (type);
6371       if (wi::les_p (minv, widest_int::from (step_min, sgn))
6372           && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6373         {
6374           if (wi::ge_p (step_min, useful_min, sgn))
6375             return ssize_int (useful_min);
6376           else if (wi::lt_p (step_max, 0, sgn))
6377             return ssize_int (-1);
6378           else
6379             return fold_convert (ssizetype, step);
6380         }
6381     }
6382   return DR_STEP (dr);
6383 }
6384
6385 /* Return a value that is negative iff DR has a negative step.  */
6386
6387 tree
6388 dr_direction_indicator (struct data_reference *dr)
6389 {
6390   return dr_step_indicator (dr, 0);
6391 }
6392
6393 /* Return a value that is zero iff DR has a zero step.  */
6394
6395 tree
6396 dr_zero_step_indicator (struct data_reference *dr)
6397 {
6398   return dr_step_indicator (dr, 1);
6399 }
6400
6401 /* Return true if DR is known to have a nonnegative (but possibly zero)
6402    step.  */
6403
6404 bool
6405 dr_known_forward_stride_p (struct data_reference *dr)
6406 {
6407   tree indicator = dr_direction_indicator (dr);
6408   tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6409                                    fold_convert (ssizetype, indicator),
6410                                    ssize_int (0));
6411   return neg_step_val && integer_zerop (neg_step_val);
6412 }