gcc/tree-data-ref.c

   1 /* Data references and dependences detectors.
   2    Copyright (C) 2003-2021 Free Software Foundation, Inc.
   3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass walks a given loop structure searching for array
  22    references.  The information about the array accesses is recorded
  23    in DATA_REFERENCE structures.
  24
  25    The basic test for determining the dependences is:
  26    given two access functions chrec1 and chrec2 to a same array, and
  27    x and y two vectors from the iteration domain, the same element of
  28    the array is accessed twice at iterations x and y if and only if:
  29    |             chrec1 (x) == chrec2 (y).
  30
  31    The goals of this analysis are:
  32
  33    - to determine the independence: the relation between two
  34      independent accesses is qualified with the chrec_known (this
  35      information allows a loop parallelization),
  36
  37    - when two data references access the same data, to qualify the
  38      dependence relation with classic dependence representations:
  39
  40        - distance vectors
  41        - direction vectors
  42        - loop carried level dependence
  43        - polyhedron dependence
  44      or with the chains of recurrences based representation,
  45
  46    - to define a knowledge base for storing the data dependence
  47      information,
  48
  49    - to define an interface to access this data.
  50
  51
  52    Definitions:
  53
  54    - subscript: given two array accesses a subscript is the tuple
  55    composed of the access functions for a given dimension.  Example:
  56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
  57    (f1, g1), (f2, g2), (f3, g3).
  58
  59    - Diophantine equation: an equation whose coefficients and
  60    solutions are integer constants, for example the equation
  61    |   3*x + 2*y = 1
  62    has an integer solution x = 1 and y = -1.
  63
  64    References:
  65
  66    - "Advanced Compilation for High Performance Computing" by Randy
  67    Allen and Ken Kennedy.
  68    http://citeseer.ist.psu.edu/goff91practical.html
  69
  70    - "Loop Transformations for Restructuring Compilers - The Foundations"
  71    by Utpal Banerjee.
  72
  73
  74 */
  75
  76 #include "config.h"
  77 #include "system.h"
  78 #include "coretypes.h"
  79 #include "backend.h"
  80 #include "rtl.h"
  81 #include "tree.h"
  82 #include "gimple.h"
  83 #include "gimple-pretty-print.h"
  84 #include "alias.h"
  85 #include "fold-const.h"
  86 #include "expr.h"
  87 #include "gimple-iterator.h"
  88 #include "tree-ssa-loop-niter.h"
  89 #include "tree-ssa-loop.h"
  90 #include "tree-ssa.h"
  91 #include "cfgloop.h"
  92 #include "tree-data-ref.h"
  93 #include "tree-scalar-evolution.h"
  94 #include "dumpfile.h"
  95 #include "tree-affine.h"
  96 #include "builtins.h"
  97 #include "tree-eh.h"
  98 #include "ssa.h"
  99 #include "internal-fn.h"
 100 #include "vr-values.h"
 101 #include "range-op.h"
 102 #include "tree-ssa-loop-ivopts.h"
 103
 104 static struct datadep_stats
 105 {
 106   int num_dependence_tests;
 107   int num_dependence_dependent;
 108   int num_dependence_independent;
 109   int num_dependence_undetermined;
 110
 111   int num_subscript_tests;
 112   int num_subscript_undetermined;
 113   int num_same_subscript_function;
 114
 115   int num_ziv;
 116   int num_ziv_independent;
 117   int num_ziv_dependent;
 118   int num_ziv_unimplemented;
 119
 120   int num_siv;
 121   int num_siv_independent;
 122   int num_siv_dependent;
 123   int num_siv_unimplemented;
 124
 125   int num_miv;
 126   int num_miv_independent;
 127   int num_miv_dependent;
 128   int num_miv_unimplemented;
 129 } dependence_stats;
 130
 131 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
 132                                            unsigned int, unsigned int,
 133                                            class loop *);
 134 /* Returns true iff A divides B.  */
 135
 136 static inline bool
 137 tree_fold_divides_p (const_tree a, const_tree b)
 138 {
 139   gcc_assert (TREE_CODE (a) == INTEGER_CST);
 140   gcc_assert (TREE_CODE (b) == INTEGER_CST);
 141   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
 142 }
 143
 144 /* Returns true iff A divides B.  */
 145
 146 static inline bool
 147 int_divides_p (lambda_int a, lambda_int b)
 148 {
 149   return ((b % a) == 0);
 150 }
 151
 152 /* Return true if reference REF contains a union access.  */
 153
 154 static bool
 155 ref_contains_union_access_p (tree ref)
 156 {
 157   while (handled_component_p (ref))
 158     {
 159       ref = TREE_OPERAND (ref, 0);
 160       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
 161           || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
 162         return true;
 163     }
 164   return false;
 165 }
 166
 167 \f
 168
 169 /* Dump into FILE all the data references from DATAREFS.  */
 170
 171 static void
 172 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
 173 {
 174   for (data_reference *dr : datarefs)
 175     dump_data_reference (file, dr);
 176 }
 177
 178 /* Unified dump into FILE all the data references from DATAREFS.  */
 179
 180 DEBUG_FUNCTION void
 181 debug (vec<data_reference_p> &ref)
 182 {
 183   dump_data_references (stderr, ref);
 184 }
 185
 186 DEBUG_FUNCTION void
 187 debug (vec<data_reference_p> *ptr)
 188 {
 189   if (ptr)
 190     debug (*ptr);
 191   else
 192     fprintf (stderr, "<nil>\n");
 193 }
 194
 195
 196 /* Dump into STDERR all the data references from DATAREFS.  */
 197
 198 DEBUG_FUNCTION void
 199 debug_data_references (vec<data_reference_p> datarefs)
 200 {
 201   dump_data_references (stderr, datarefs);
 202 }
 203
 204 /* Print to STDERR the data_reference DR.  */
 205
 206 DEBUG_FUNCTION void
 207 debug_data_reference (struct data_reference *dr)
 208 {
 209   dump_data_reference (stderr, dr);
 210 }
 211
 212 /* Dump function for a DATA_REFERENCE structure.  */
 213
 214 void
 215 dump_data_reference (FILE *outf,
 216                      struct data_reference *dr)
 217 {
 218   unsigned int i;
 219
 220   fprintf (outf, "#(Data Ref: \n");
 221   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
 222   fprintf (outf, "#  stmt: ");
 223   print_gimple_stmt (outf, DR_STMT (dr), 0);
 224   fprintf (outf, "#  ref: ");
 225   print_generic_stmt (outf, DR_REF (dr));
 226   fprintf (outf, "#  base_object: ");
 227   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
 228
 229   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
 230     {
 231       fprintf (outf, "#  Access function %d: ", i);
 232       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
 233     }
 234   fprintf (outf, "#)\n");
 235 }
 236
 237 /* Unified dump function for a DATA_REFERENCE structure.  */
 238
 239 DEBUG_FUNCTION void
 240 debug (data_reference &ref)
 241 {
 242   dump_data_reference (stderr, &ref);
 243 }
 244
 245 DEBUG_FUNCTION void
 246 debug (data_reference *ptr)
 247 {
 248   if (ptr)
 249     debug (*ptr);
 250   else
 251     fprintf (stderr, "<nil>\n");
 252 }
 253
 254
 255 /* Dumps the affine function described by FN to the file OUTF.  */
 256
 257 DEBUG_FUNCTION void
 258 dump_affine_function (FILE *outf, affine_fn fn)
 259 {
 260   unsigned i;
 261   tree coef;
 262
 263   print_generic_expr (outf, fn[0], TDF_SLIM);
 264   for (i = 1; fn.iterate (i, &coef); i++)
 265     {
 266       fprintf (outf, " + ");
 267       print_generic_expr (outf, coef, TDF_SLIM);
 268       fprintf (outf, " * x_%u", i);
 269     }
 270 }
 271
 272 /* Dumps the conflict function CF to the file OUTF.  */
 273
 274 DEBUG_FUNCTION void
 275 dump_conflict_function (FILE *outf, conflict_function *cf)
 276 {
 277   unsigned i;
 278
 279   if (cf->n == NO_DEPENDENCE)
 280     fprintf (outf, "no dependence");
 281   else if (cf->n == NOT_KNOWN)
 282     fprintf (outf, "not known");
 283   else
 284     {
 285       for (i = 0; i < cf->n; i++)
 286         {
 287           if (i != 0)
 288             fprintf (outf, " ");
 289           fprintf (outf, "[");
 290           dump_affine_function (outf, cf->fns[i]);
 291           fprintf (outf, "]");
 292         }
 293     }
 294 }
 295
 296 /* Dump function for a SUBSCRIPT structure.  */
 297
 298 DEBUG_FUNCTION void
 299 dump_subscript (FILE *outf, struct subscript *subscript)
 300 {
 301   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
 302
 303   fprintf (outf, "\n (subscript \n");
 304   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
 305   dump_conflict_function (outf, cf);
 306   if (CF_NONTRIVIAL_P (cf))
 307     {
 308       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 309       fprintf (outf, "\n  last_conflict: ");
 310       print_generic_expr (outf, last_iteration);
 311     }
 312
 313   cf = SUB_CONFLICTS_IN_B (subscript);
 314   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
 315   dump_conflict_function (outf, cf);
 316   if (CF_NONTRIVIAL_P (cf))
 317     {
 318       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 319       fprintf (outf, "\n  last_conflict: ");
 320       print_generic_expr (outf, last_iteration);
 321     }
 322
 323   fprintf (outf, "\n  (Subscript distance: ");
 324   print_generic_expr (outf, SUB_DISTANCE (subscript));
 325   fprintf (outf, " ))\n");
 326 }
 327
 328 /* Print the classic direction vector DIRV to OUTF.  */
 329
 330 DEBUG_FUNCTION void
 331 print_direction_vector (FILE *outf,
 332                         lambda_vector dirv,
 333                         int length)
 334 {
 335   int eq;
 336
 337   for (eq = 0; eq < length; eq++)
 338     {
 339       enum data_dependence_direction dir = ((enum data_dependence_direction)
 340                                             dirv[eq]);
 341
 342       switch (dir)
 343         {
 344         case dir_positive:
 345           fprintf (outf, "    +");
 346           break;
 347         case dir_negative:
 348           fprintf (outf, "    -");
 349           break;
 350         case dir_equal:
 351           fprintf (outf, "    =");
 352           break;
 353         case dir_positive_or_equal:
 354           fprintf (outf, "   +=");
 355           break;
 356         case dir_positive_or_negative:
 357           fprintf (outf, "   +-");
 358           break;
 359         case dir_negative_or_equal:
 360           fprintf (outf, "   -=");
 361           break;
 362         case dir_star:
 363           fprintf (outf, "    *");
 364           break;
 365         default:
 366           fprintf (outf, "indep");
 367           break;
 368         }
 369     }
 370   fprintf (outf, "\n");
 371 }
 372
 373 /* Print a vector of direction vectors.  */
 374
 375 DEBUG_FUNCTION void
 376 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
 377                    int length)
 378 {
 379   for (lambda_vector v : dir_vects)
 380     print_direction_vector (outf, v, length);
 381 }
 382
 383 /* Print out a vector VEC of length N to OUTFILE.  */
 384
 385 DEBUG_FUNCTION void
 386 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
 387 {
 388   int i;
 389
 390   for (i = 0; i < n; i++)
 391     fprintf (outfile, "%3d ", (int)vector[i]);
 392   fprintf (outfile, "\n");
 393 }
 394
 395 /* Print a vector of distance vectors.  */
 396
 397 DEBUG_FUNCTION void
 398 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
 399                     int length)
 400 {
 401   for (lambda_vector v : dist_vects)
 402     print_lambda_vector (outf, v, length);
 403 }
 404
 405 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
 406
 407 DEBUG_FUNCTION void
 408 dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
 409 {
 410   struct data_reference *dra, *drb;
 411
 412   fprintf (outf, "(Data Dep: \n");
 413
 414   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 415     {
 416       if (ddr)
 417         {
 418           dra = DDR_A (ddr);
 419           drb = DDR_B (ddr);
 420           if (dra)
 421             dump_data_reference (outf, dra);
 422           else
 423             fprintf (outf, "    (nil)\n");
 424           if (drb)
 425             dump_data_reference (outf, drb);
 426           else
 427             fprintf (outf, "    (nil)\n");
 428         }
 429       fprintf (outf, "    (don't know)\n)\n");
 430       return;
 431     }
 432
 433   dra = DDR_A (ddr);
 434   drb = DDR_B (ddr);
 435   dump_data_reference (outf, dra);
 436   dump_data_reference (outf, drb);
 437
 438   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 439     fprintf (outf, "    (no dependence)\n");
 440
 441   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
 442     {
 443       unsigned int i;
 444       class loop *loopi;
 445
 446       subscript *sub;
 447       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
 448         {
 449           fprintf (outf, "  access_fn_A: ");
 450           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
 451           fprintf (outf, "  access_fn_B: ");
 452           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
 453           dump_subscript (outf, sub);
 454         }
 455
 456       fprintf (outf, "  loop nest: (");
 457       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
 458         fprintf (outf, "%d ", loopi->num);
 459       fprintf (outf, ")\n");
 460
 461       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
 462         {
 463           fprintf (outf, "  distance_vector: ");
 464           print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
 465                                DDR_NB_LOOPS (ddr));
 466         }
 467
 468       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
 469         {
 470           fprintf (outf, "  direction_vector: ");
 471           print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
 472                                   DDR_NB_LOOPS (ddr));
 473         }
 474     }
 475
 476   fprintf (outf, ")\n");
 477 }
 478
 479 /* Debug version.  */
 480
 481 DEBUG_FUNCTION void
 482 debug_data_dependence_relation (const struct data_dependence_relation *ddr)
 483 {
 484   dump_data_dependence_relation (stderr, ddr);
 485 }
 486
 487 /* Dump into FILE all the dependence relations from DDRS.  */
 488
 489 DEBUG_FUNCTION void
 490 dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
 491 {
 492   for (auto ddr : ddrs)
 493     dump_data_dependence_relation (file, ddr);
 494 }
 495
 496 DEBUG_FUNCTION void
 497 debug (vec<ddr_p> &ref)
 498 {
 499   dump_data_dependence_relations (stderr, ref);
 500 }
 501
 502 DEBUG_FUNCTION void
 503 debug (vec<ddr_p> *ptr)
 504 {
 505   if (ptr)
 506     debug (*ptr);
 507   else
 508     fprintf (stderr, "<nil>\n");
 509 }
 510
 511
 512 /* Dump to STDERR all the dependence relations from DDRS.  */
 513
 514 DEBUG_FUNCTION void
 515 debug_data_dependence_relations (vec<ddr_p> ddrs)
 516 {
 517   dump_data_dependence_relations (stderr, ddrs);
 518 }
 519
 520 /* Dumps the distance and direction vectors in FILE.  DDRS contains
 521    the dependence relations, and VECT_SIZE is the size of the
 522    dependence vectors, or in other words the number of loops in the
 523    considered nest.  */
 524
 525 DEBUG_FUNCTION void
 526 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
 527 {
 528   for (data_dependence_relation *ddr : ddrs)
 529     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
 530       {
 531         for (lambda_vector v : DDR_DIST_VECTS (ddr))
 532           {
 533             fprintf (file, "DISTANCE_V (");
 534             print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
 535             fprintf (file, ")\n");
 536           }
 537
 538         for (lambda_vector v : DDR_DIR_VECTS (ddr))
 539           {
 540             fprintf (file, "DIRECTION_V (");
 541             print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
 542             fprintf (file, ")\n");
 543           }
 544       }
 545
 546   fprintf (file, "\n\n");
 547 }
 548
 549 /* Dumps the data dependence relations DDRS in FILE.  */
 550
 551 DEBUG_FUNCTION void
 552 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
 553 {
 554   for (data_dependence_relation *ddr : ddrs)
 555     dump_data_dependence_relation (file, ddr);
 556
 557   fprintf (file, "\n\n");
 558 }
 559
 560 DEBUG_FUNCTION void
 561 debug_ddrs (vec<ddr_p> ddrs)
 562 {
 563   dump_ddrs (stderr, ddrs);
 564 }
 565
 566 /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
 567    OP0 CODE OP1, where:
 568
 569    - OP0 CODE OP1 has integral type TYPE
 570    - the range of OP0 is given by OP0_RANGE and
 571    - the range of OP1 is given by OP1_RANGE.
 572
 573    Independently of RESULT_RANGE, try to compute:
 574
 575      DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
 576              - (sizetype) (OP0 CODE OP1)
 577
 578    as a constant and subtract DELTA from the ssizetype constant in *OFF.
 579    Return true on success, or false if DELTA is not known at compile time.
 580
 581    Truncation and sign changes are known to distribute over CODE, i.e.
 582
 583      (itype) (A CODE B) == (itype) A CODE (itype) B
 584
 585    for any integral type ITYPE whose precision is no greater than the
 586    precision of A and B.  */
 587
 588 static bool
 589 compute_distributive_range (tree type, value_range &op0_range,
 590                             tree_code code, value_range &op1_range,
 591                             tree *off, value_range *result_range)
 592 {
 593   gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
 594   if (result_range)
 595     {
 596       range_operator *op = range_op_handler (code, type);
 597       op->fold_range (*result_range, type, op0_range, op1_range);
 598     }
 599
 600   /* The distributive property guarantees that if TYPE is no narrower
 601      than SIZETYPE,
 602
 603        (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
 604
 605      and so we can treat DELTA as zero.  */
 606   if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
 607     return true;
 608
 609   /* If overflow is undefined, we can assume that:
 610
 611        X == (ssizetype) OP0 CODE (ssizetype) OP1
 612
 613      is within the range of TYPE, i.e.:
 614
 615        X == (ssizetype) (TYPE) X
 616
 617      Distributing the (TYPE) truncation over X gives:
 618
 619        X == (ssizetype) (OP0 CODE OP1)
 620
 621      Casting both sides to sizetype and distributing the sizetype cast
 622      over X gives:
 623
 624        (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
 625
 626      and so we can treat DELTA as zero.  */
 627   if (TYPE_OVERFLOW_UNDEFINED (type))
 628     return true;
 629
 630   /* Compute the range of:
 631
 632        (ssizetype) OP0 CODE (ssizetype) OP1
 633
 634      The distributive property guarantees that this has the same bitpattern as:
 635
 636        (sizetype) OP0 CODE (sizetype) OP1
 637
 638      but its range is more conducive to analysis.  */
 639   range_cast (op0_range, ssizetype);
 640   range_cast (op1_range, ssizetype);
 641   value_range wide_range;
 642   range_operator *op = range_op_handler (code, ssizetype);
 643   bool saved_flag_wrapv = flag_wrapv;
 644   flag_wrapv = 1;
 645   op->fold_range (wide_range, ssizetype, op0_range, op1_range);
 646   flag_wrapv = saved_flag_wrapv;
 647   if (wide_range.num_pairs () != 1 || !range_int_cst_p (&wide_range))
 648     return false;
 649
 650   wide_int lb = wide_range.lower_bound ();
 651   wide_int ub = wide_range.upper_bound ();
 652
 653   /* Calculate the number of times that each end of the range overflows or
 654      underflows TYPE.  We can only calculate DELTA if the numbers match.  */
 655   unsigned int precision = TYPE_PRECISION (type);
 656   if (!TYPE_UNSIGNED (type))
 657     {
 658       wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
 659       lb -= type_min;
 660       ub -= type_min;
 661     }
 662   wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
 663   lb &= upper_bits;
 664   ub &= upper_bits;
 665   if (lb != ub)
 666     return false;
 667
 668   /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
 669      negative values indicating underflow.  The low PRECISION bits of LB
 670      are clear, so DELTA is therefore LB (== UB).  */
 671   *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
 672   return true;
 673 }
 674
 675 /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
 676    given that OP has type FROM_TYPE and range RANGE.  Both TO_TYPE and
 677    FROM_TYPE are integral types.  */
 678
 679 static bool
 680 nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range)
 681 {
 682   gcc_assert (INTEGRAL_TYPE_P (to_type)
 683               && INTEGRAL_TYPE_P (from_type)
 684               && !TYPE_OVERFLOW_TRAPS (to_type)
 685               && !TYPE_OVERFLOW_TRAPS (from_type));
 686
 687   /* Converting to something no narrower than sizetype and then to sizetype
 688      is equivalent to converting directly to sizetype.  */
 689   if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
 690     return true;
 691
 692   /* Check whether TO_TYPE can represent all values that FROM_TYPE can.  */
 693   if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
 694       && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
 695     return true;
 696
 697   /* For narrowing conversions, we could in principle test whether
 698      the bits in FROM_TYPE but not in TO_TYPE have a fixed value
 699      and apply a constant adjustment.
 700
 701      For other conversions (which involve a sign change) we could
 702      check that the signs are always equal, and apply a constant
 703      adjustment if the signs are negative.
 704
 705      However, both cases should be rare.  */
 706   return range_fits_type_p (&range, TYPE_PRECISION (to_type),
 707                             TYPE_SIGN (to_type));
 708 }
 709
 710 static void
 711 split_constant_offset (tree type, tree *var, tree *off,
 712                        value_range *result_range,
 713                        hash_map<tree, std::pair<tree, tree> > &cache,
 714                        unsigned *limit);
 715
 716 /* Helper function for split_constant_offset.  If TYPE is a pointer type,
 717    try to express OP0 CODE OP1 as:
 718
 719      POINTER_PLUS <*VAR, (sizetype) *OFF>
 720
 721    where:
 722
 723    - *VAR has type TYPE
 724    - *OFF is a constant of type ssizetype.
 725
 726    If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
 727
 728      *VAR + (sizetype) *OFF
 729
 730    where:
 731
 732    - *VAR has type sizetype
 733    - *OFF is a constant of type ssizetype.
 734
 735    In both cases, OP0 CODE OP1 has type TYPE.
 736
 737    Return true on success.  A false return value indicates that we can't
 738    do better than set *OFF to zero.
 739
 740    When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
 741    if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
 742
 743    CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
 744    visited.  LIMIT counts down the number of SSA names that we are
 745    allowed to process before giving up.  */
 746
 747 static bool
 748 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 749                          tree *var, tree *off, value_range *result_range,
 750                          hash_map<tree, std::pair<tree, tree> > &cache,
 751                          unsigned *limit)
 752 {
 753   tree var0, var1;
 754   tree off0, off1;
 755   value_range op0_range, op1_range;
 756
 757   *var = NULL_TREE;
 758   *off = NULL_TREE;
 759
 760   switch (code)
 761     {
 762     case INTEGER_CST:
 763       *var = size_int (0);
 764       *off = fold_convert (ssizetype, op0);
 765       if (result_range)
 766         result_range->set (op0, op0);
 767       return true;
 768
 769     case POINTER_PLUS_EXPR:
 770       split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
 771       split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
 772       *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
 773       *off = size_binop (PLUS_EXPR, off0, off1);
 774       return true;
 775
 776     case PLUS_EXPR:
 777     case MINUS_EXPR:
 778       split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
 779       split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
 780       *off = size_binop (code, off0, off1);
 781       if (!compute_distributive_range (type, op0_range, code, op1_range,
 782                                        off, result_range))
 783         return false;
 784       *var = fold_build2 (code, sizetype, var0, var1);
 785       return true;
 786
 787     case MULT_EXPR:
 788       if (TREE_CODE (op1) != INTEGER_CST)
 789         return false;
 790
 791       split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
 792       op1_range.set (op1, op1);
 793       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
 794       if (!compute_distributive_range (type, op0_range, code, op1_range,
 795                                        off, result_range))
 796         return false;
 797       *var = fold_build2 (MULT_EXPR, sizetype, var0,
 798                           fold_convert (sizetype, op1));
 799       return true;
 800
 801     case ADDR_EXPR:
 802       {
 803         tree base, poffset;
 804         poly_int64 pbitsize, pbitpos, pbytepos;
 805         machine_mode pmode;
 806         int punsignedp, preversep, pvolatilep;
 807
 808         op0 = TREE_OPERAND (op0, 0);
 809         base
 810           = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
 811                                  &punsignedp, &preversep, &pvolatilep);
 812
 813         if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 814           return false;
 815         base = build_fold_addr_expr (base);
 816         off0 = ssize_int (pbytepos);
 817
 818         if (poffset)
 819           {
 820             split_constant_offset (poffset, &poffset, &off1, nullptr,
 821                                    cache, limit);
 822             off0 = size_binop (PLUS_EXPR, off0, off1);
 823             base = fold_build_pointer_plus (base, poffset);
 824           }
 825
 826         var0 = fold_convert (type, base);
 827
 828         /* If variable length types are involved, punt, otherwise casts
 829            might be converted into ARRAY_REFs in gimplify_conversion.
 830            To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
 831            possibly no longer appears in current GIMPLE, might resurface.
 832            This perhaps could run
 833            if (CONVERT_EXPR_P (var0))
 834              {
 835                gimplify_conversion (&var0);
 836                // Attempt to fill in any within var0 found ARRAY_REF's
 837                // element size from corresponding op embedded ARRAY_REF,
 838                // if unsuccessful, just punt.
 839              }  */
 840         while (POINTER_TYPE_P (type))
 841           type = TREE_TYPE (type);
 842         if (int_size_in_bytes (type) < 0)
 843           return false;
 844
 845         *var = var0;
 846         *off = off0;
 847         return true;
 848       }
 849
 850     case SSA_NAME:
 851       {
 852         if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
 853           return false;
 854
 855         gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
 856         enum tree_code subcode;
 857
 858         if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
 859           return false;
 860
 861         subcode = gimple_assign_rhs_code (def_stmt);
 862
 863         /* We are using a cache to avoid un-CSEing large amounts of code.  */
 864         bool use_cache = false;
 865         if (!has_single_use (op0)
 866             && (subcode == POINTER_PLUS_EXPR
 867                 || subcode == PLUS_EXPR
 868                 || subcode == MINUS_EXPR
 869                 || subcode == MULT_EXPR
 870                 || subcode == ADDR_EXPR
 871                 || CONVERT_EXPR_CODE_P (subcode)))
 872           {
 873             use_cache = true;
 874             bool existed;
 875             std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
 876             if (existed)
 877               {
 878                 if (integer_zerop (e.second))
 879                   return false;
 880                 *var = e.first;
 881                 *off = e.second;
 882                 /* The caller sets the range in this case.  */
 883                 return true;
 884               }
 885             e = std::make_pair (op0, ssize_int (0));
 886           }
 887
 888         if (*limit == 0)
 889           return false;
 890         --*limit;
 891
 892         var0 = gimple_assign_rhs1 (def_stmt);
 893         var1 = gimple_assign_rhs2 (def_stmt);
 894
 895         bool res = split_constant_offset_1 (type, var0, subcode, var1,
 896                                             var, off, nullptr, cache, limit);
 897         if (res && use_cache)
 898           *cache.get (op0) = std::make_pair (*var, *off);
 899         /* The caller sets the range in this case.  */
 900         return res;
 901       }
 902     CASE_CONVERT:
 903       {
 904         /* We can only handle the following conversions:
 905
 906            - Conversions from one pointer type to another pointer type.
 907
 908            - Conversions from one non-trapping integral type to another
 909              non-trapping integral type.  In this case, the recursive
 910              call makes sure that:
 911
 912                (sizetype) OP0
 913
 914              can be expressed as a sizetype operation involving VAR and OFF,
 915              and all we need to do is check whether:
 916
 917                (sizetype) OP0 == (sizetype) (TYPE) OP0
 918
 919            - Conversions from a non-trapping sizetype-size integral type to
 920              a like-sized pointer type.  In this case, the recursive call
 921              makes sure that:
 922
 923                (sizetype) OP0 == *VAR + (sizetype) *OFF
 924
 925              and we can convert that to:
 926
 927                POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
 928
 929            - Conversions from a sizetype-sized pointer type to a like-sized
 930              non-trapping integral type.  In this case, the recursive call
 931              makes sure that:
 932
 933                OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
 934
 935              where the POINTER_PLUS and *VAR have the same precision as
 936              TYPE (and the same precision as sizetype).  Then:
 937
 938                (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF.  */
 939         tree itype = TREE_TYPE (op0);
 940         if ((POINTER_TYPE_P (itype)
 941              || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
 942             && (POINTER_TYPE_P (type)
 943                 || (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
 944             && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
 945                 || (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
 946                     && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
 947           {
 948             if (POINTER_TYPE_P (type))
 949               {
 950                 split_constant_offset (op0, var, off, nullptr, cache, limit);
 951                 *var = fold_convert (type, *var);
 952               }
 953             else if (POINTER_TYPE_P (itype))
 954               {
 955                 split_constant_offset (op0, var, off, nullptr, cache, limit);
 956                 *var = fold_convert (sizetype, *var);
 957               }
 958             else
 959               {
 960                 split_constant_offset (op0, var, off, &op0_range,
 961                                        cache, limit);
 962                 if (!nop_conversion_for_offset_p (type, itype, op0_range))
 963                   return false;
 964                 if (result_range)
 965                   {
 966                     *result_range = op0_range;
 967                     range_cast (*result_range, type);
 968                   }
 969               }
 970             return true;
 971           }
 972         return false;
 973       }
 974
 975     default:
 976       return false;
 977     }
 978 }
 979
 980 /* If EXP has pointer type, try to express it as:
 981
 982      POINTER_PLUS <*VAR, (sizetype) *OFF>
 983
 984    where:
 985
 986    - *VAR has the same type as EXP
 987    - *OFF is a constant of type ssizetype.
 988
 989    If EXP has an integral type, try to express (sizetype) EXP as:
 990
 991      *VAR + (sizetype) *OFF
 992
 993    where:
 994
 995    - *VAR has type sizetype
 996    - *OFF is a constant of type ssizetype.
 997
 998    If EXP_RANGE is nonnull, set it to the range of EXP.
 999
1000    CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1001    visited.  LIMIT counts down the number of SSA names that we are
1002    allowed to process before giving up.  */
1003
1004 static void
1005 split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range,
1006                        hash_map<tree, std::pair<tree, tree> > &cache,
1007                        unsigned *limit)
1008 {
1009   tree type = TREE_TYPE (exp), op0, op1;
1010   enum tree_code code;
1011
1012   code = TREE_CODE (exp);
1013   if (exp_range)
1014     {
1015       *exp_range = type;
1016       if (code == SSA_NAME)
1017         {
1018           value_range vr;
1019           get_range_query (cfun)->range_of_expr (vr, exp);
1020           if (vr.undefined_p ())
1021             vr.set_varying (TREE_TYPE (exp));
1022           wide_int var_min = wi::to_wide (vr.min ());
1023           wide_int var_max = wi::to_wide (vr.max ());
1024           value_range_kind vr_kind = vr.kind ();
1025           wide_int var_nonzero = get_nonzero_bits (exp);
1026           vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1027                                                        &var_min, &var_max,
1028                                                        var_nonzero,
1029                                                        TYPE_SIGN (type));
1030           /* This check for VR_VARYING is here because the old code
1031              using get_range_info would return VR_RANGE for the entire
1032              domain, instead of VR_VARYING.  The new code normalizes
1033              full-domain ranges to VR_VARYING.  */
1034           if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
1035             *exp_range = value_range (type, var_min, var_max);
1036         }
1037     }
1038
1039   if (!tree_is_chrec (exp)
1040       && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1041     {
1042       extract_ops_from_tree (exp, &code, &op0, &op1);
1043       if (split_constant_offset_1 (type, op0, code, op1, var, off,
1044                                    exp_range, cache, limit))
1045         return;
1046     }
1047
1048   *var = exp;
1049   if (INTEGRAL_TYPE_P (type))
1050     *var = fold_convert (sizetype, *var);
1051   *off = ssize_int (0);
1052
1053   value_range r;
1054   if (exp_range && code != SSA_NAME
1055       && get_range_query (cfun)->range_of_expr (r, exp)
1056       && !r.undefined_p ())
1057     *exp_range = r;
1058 }
1059
1060 /* Expresses EXP as VAR + OFF, where OFF is a constant.  VAR has the same
1061    type as EXP while OFF has type ssizetype.  */
1062
1063 void
1064 split_constant_offset (tree exp, tree *var, tree *off)
1065 {
1066   unsigned limit = param_ssa_name_def_chain_limit;
1067   static hash_map<tree, std::pair<tree, tree> > *cache;
1068   if (!cache)
1069     cache = new hash_map<tree, std::pair<tree, tree> > (37);
1070   split_constant_offset (exp, var, off, nullptr, *cache, &limit);
1071   *var = fold_convert (TREE_TYPE (exp), *var);
1072   cache->empty ();
1073 }
1074
1075 /* Returns the address ADDR of an object in a canonical shape (without nop
1076    casts, and with type of pointer to the object).  */
1077
1078 static tree
1079 canonicalize_base_object_address (tree addr)
1080 {
1081   tree orig = addr;
1082
1083   STRIP_NOPS (addr);
1084
1085   /* The base address may be obtained by casting from integer, in that case
1086      keep the cast.  */
1087   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1088     return orig;
1089
1090   if (TREE_CODE (addr) != ADDR_EXPR)
1091     return addr;
1092
1093   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1094 }
1095
1096 /* Analyze the behavior of memory reference REF within STMT.
1097    There are two modes:
1098
1099    - BB analysis.  In this case we simply split the address into base,
1100      init and offset components, without reference to any containing loop.
1101      The resulting base and offset are general expressions and they can
1102      vary arbitrarily from one iteration of the containing loop to the next.
1103      The step is always zero.
1104
1105    - loop analysis.  In this case we analyze the reference both wrt LOOP
1106      and on the basis that the reference occurs (is "used") in LOOP;
1107      see the comment above analyze_scalar_evolution_in_loop for more
1108      information about this distinction.  The base, init, offset and
1109      step fields are all invariant in LOOP.
1110
1111    Perform BB analysis if LOOP is null, or if LOOP is the function's
1112    dummy outermost loop.  In other cases perform loop analysis.
1113
1114    Return true if the analysis succeeded and store the results in DRB if so.
1115    BB analysis can only fail for bitfield or reversed-storage accesses.  */
1116
1117 opt_result
1118 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1119                       class loop *loop, const gimple *stmt)
1120 {
1121   poly_int64 pbitsize, pbitpos;
1122   tree base, poffset;
1123   machine_mode pmode;
1124   int punsignedp, preversep, pvolatilep;
1125   affine_iv base_iv, offset_iv;
1126   tree init, dinit, step;
1127   bool in_loop = (loop && loop->num);
1128
1129   if (dump_file && (dump_flags & TDF_DETAILS))
1130     fprintf (dump_file, "analyze_innermost: ");
1131
1132   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1133                               &punsignedp, &preversep, &pvolatilep);
1134   gcc_assert (base != NULL_TREE);
1135
1136   poly_int64 pbytepos;
1137   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
1138     return opt_result::failure_at (stmt,
1139                                    "failed: bit offset alignment.\n");
1140
1141   if (preversep)
1142     return opt_result::failure_at (stmt,
1143                                    "failed: reverse storage order.\n");
1144
1145   /* Calculate the alignment and misalignment for the inner reference.  */
1146   unsigned int HOST_WIDE_INT bit_base_misalignment;
1147   unsigned int bit_base_alignment;
1148   get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1149
1150   /* There are no bitfield references remaining in BASE, so the values
1151      we got back must be whole bytes.  */
1152   gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1153               && bit_base_misalignment % BITS_PER_UNIT == 0);
1154   unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1155   poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1156
1157   if (TREE_CODE (base) == MEM_REF)
1158     {
1159       if (!integer_zerop (TREE_OPERAND (base, 1)))
1160         {
1161           /* Subtract MOFF from the base and add it to POFFSET instead.
1162              Adjust the misalignment to reflect the amount we subtracted.  */
1163           poly_offset_int moff = mem_ref_offset (base);
1164           base_misalignment -= moff.force_shwi ();
1165           tree mofft = wide_int_to_tree (sizetype, moff);
1166           if (!poffset)
1167             poffset = mofft;
1168           else
1169             poffset = size_binop (PLUS_EXPR, poffset, mofft);
1170         }
1171       base = TREE_OPERAND (base, 0);
1172     }
1173   else
1174     base = build_fold_addr_expr (base);
1175
1176   if (in_loop)
1177     {
1178       if (!simple_iv (loop, loop, base, &base_iv, true))
1179         return opt_result::failure_at
1180           (stmt, "failed: evolution of base is not affine.\n");
1181     }
1182   else
1183     {
1184       base_iv.base = base;
1185       base_iv.step = ssize_int (0);
1186       base_iv.no_overflow = true;
1187     }
1188
1189   if (!poffset)
1190     {
1191       offset_iv.base = ssize_int (0);
1192       offset_iv.step = ssize_int (0);
1193     }
1194   else
1195     {
1196       if (!in_loop)
1197         {
1198           offset_iv.base = poffset;
1199           offset_iv.step = ssize_int (0);
1200         }
1201       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1202         return opt_result::failure_at
1203           (stmt, "failed: evolution of offset is not affine.\n");
1204     }
1205
1206   init = ssize_int (pbytepos);
1207
1208   /* Subtract any constant component from the base and add it to INIT instead.
1209      Adjust the misalignment to reflect the amount we subtracted.  */
1210   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1211   init = size_binop (PLUS_EXPR, init, dinit);
1212   base_misalignment -= TREE_INT_CST_LOW (dinit);
1213
1214   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1215   init = size_binop (PLUS_EXPR, init, dinit);
1216
1217   step = size_binop (PLUS_EXPR,
1218                      fold_convert (ssizetype, base_iv.step),
1219                      fold_convert (ssizetype, offset_iv.step));
1220
1221   base = canonicalize_base_object_address (base_iv.base);
1222
1223   /* See if get_pointer_alignment can guarantee a higher alignment than
1224      the one we calculated above.  */
1225   unsigned int HOST_WIDE_INT alt_misalignment;
1226   unsigned int alt_alignment;
1227   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1228
1229   /* As above, these values must be whole bytes.  */
1230   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1231               && alt_misalignment % BITS_PER_UNIT == 0);
1232   alt_alignment /= BITS_PER_UNIT;
1233   alt_misalignment /= BITS_PER_UNIT;
1234
1235   if (base_alignment < alt_alignment)
1236     {
1237       base_alignment = alt_alignment;
1238       base_misalignment = alt_misalignment;
1239     }
1240
1241   drb->base_address = base;
1242   drb->offset = fold_convert (ssizetype, offset_iv.base);
1243   drb->init = init;
1244   drb->step = step;
1245   if (known_misalignment (base_misalignment, base_alignment,
1246                           &drb->base_misalignment))
1247     drb->base_alignment = base_alignment;
1248   else
1249     {
1250       drb->base_alignment = known_alignment (base_misalignment);
1251       drb->base_misalignment = 0;
1252     }
1253   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1254   drb->step_alignment = highest_pow2_factor (step);
1255
1256   if (dump_file && (dump_flags & TDF_DETAILS))
1257     fprintf (dump_file, "success.\n");
1258
1259   return opt_result::success ();
1260 }
1261
1262 /* Return true if OP is a valid component reference for a DR access
1263    function.  This accepts a subset of what handled_component_p accepts.  */
1264
1265 static bool
1266 access_fn_component_p (tree op)
1267 {
1268   switch (TREE_CODE (op))
1269     {
1270     case REALPART_EXPR:
1271     case IMAGPART_EXPR:
1272     case ARRAY_REF:
1273       return true;
1274
1275     case COMPONENT_REF:
1276       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1277
1278     default:
1279       return false;
1280     }
1281 }
1282
1283 /* Returns whether BASE can have a access_fn_component_p with BASE
1284    as base.  */
1285
1286 static bool
1287 base_supports_access_fn_components_p (tree base)
1288 {
1289   switch (TREE_CODE (TREE_TYPE (base)))
1290     {
1291     case COMPLEX_TYPE:
1292     case ARRAY_TYPE:
1293     case RECORD_TYPE:
1294       return true;
1295     default:
1296       return false;
1297     }
1298 }
1299
1300 /* Determines the base object and the list of indices of memory reference
1301    DR, analyzed in LOOP and instantiated before NEST.  */
1302
1303 static void
1304 dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
1305 {
1306   /* If analyzing a basic-block there are no indices to analyze
1307      and thus no access functions.  */
1308   if (!nest)
1309     {
1310       dri->base_object = ref;
1311       dri->access_fns.create (0);
1312       return;
1313     }
1314
1315   vec<tree> access_fns = vNULL;
1316
1317   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1318      into a two element array with a constant index.  The base is
1319      then just the immediate underlying object.  */
1320   if (TREE_CODE (ref) == REALPART_EXPR)
1321     {
1322       ref = TREE_OPERAND (ref, 0);
1323       access_fns.safe_push (integer_zero_node);
1324     }
1325   else if (TREE_CODE (ref) == IMAGPART_EXPR)
1326     {
1327       ref = TREE_OPERAND (ref, 0);
1328       access_fns.safe_push (integer_one_node);
1329     }
1330
1331   /* Analyze access functions of dimensions we know to be independent.
1332      The list of component references handled here should be kept in
1333      sync with access_fn_component_p.  */
1334   while (handled_component_p (ref))
1335     {
1336       if (TREE_CODE (ref) == ARRAY_REF)
1337         {
1338           tree op = TREE_OPERAND (ref, 1);
1339           tree access_fn = analyze_scalar_evolution (loop, op);
1340           access_fn = instantiate_scev (nest, loop, access_fn);
1341           access_fns.safe_push (access_fn);
1342         }
1343       else if (TREE_CODE (ref) == COMPONENT_REF
1344                && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1345         {
1346           /* For COMPONENT_REFs of records (but not unions!) use the
1347              FIELD_DECL offset as constant access function so we can
1348              disambiguate a[i].f1 and a[i].f2.  */
1349           tree off = component_ref_field_offset (ref);
1350           off = size_binop (PLUS_EXPR,
1351                             size_binop (MULT_EXPR,
1352                                         fold_convert (bitsizetype, off),
1353                                         bitsize_int (BITS_PER_UNIT)),
1354                             DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1355           access_fns.safe_push (off);
1356         }
1357       else
1358         /* If we have an unhandled component we could not translate
1359            to an access function stop analyzing.  We have determined
1360            our base object in this case.  */
1361         break;
1362
1363       ref = TREE_OPERAND (ref, 0);
1364     }
1365
1366   /* If the address operand of a MEM_REF base has an evolution in the
1367      analyzed nest, add it as an additional independent access-function.  */
1368   if (TREE_CODE (ref) == MEM_REF)
1369     {
1370       tree op = TREE_OPERAND (ref, 0);
1371       tree access_fn = analyze_scalar_evolution (loop, op);
1372       access_fn = instantiate_scev (nest, loop, access_fn);
1373       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1374         {
1375           tree memoff = TREE_OPERAND (ref, 1);
1376           tree base = initial_condition (access_fn);
1377           tree orig_type = TREE_TYPE (base);
1378           STRIP_USELESS_TYPE_CONVERSION (base);
1379           tree off;
1380           split_constant_offset (base, &base, &off);
1381           STRIP_USELESS_TYPE_CONVERSION (base);
1382           /* Fold the MEM_REF offset into the evolutions initial
1383              value to make more bases comparable.  */
1384           if (!integer_zerop (memoff))
1385             {
1386               off = size_binop (PLUS_EXPR, off,
1387                                 fold_convert (ssizetype, memoff));
1388               memoff = build_int_cst (TREE_TYPE (memoff), 0);
1389             }
1390           /* Adjust the offset so it is a multiple of the access type
1391              size and thus we separate bases that can possibly be used
1392              to produce partial overlaps (which the access_fn machinery
1393              cannot handle).  */
1394           wide_int rem;
1395           if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1396               && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1397               && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1398             rem = wi::mod_trunc
1399               (wi::to_wide (off),
1400                wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1401                SIGNED);
1402           else
1403             /* If we can't compute the remainder simply force the initial
1404                condition to zero.  */
1405             rem = wi::to_wide (off);
1406           off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1407           memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1408           /* And finally replace the initial condition.  */
1409           access_fn = chrec_replace_initial_condition
1410               (access_fn, fold_convert (orig_type, off));
1411           /* ???  This is still not a suitable base object for
1412              dr_may_alias_p - the base object needs to be an
1413              access that covers the object as whole.  With
1414              an evolution in the pointer this cannot be
1415              guaranteed.
1416              As a band-aid, mark the access so we can special-case
1417              it in dr_may_alias_p.  */
1418           tree old = ref;
1419           ref = fold_build2_loc (EXPR_LOCATION (ref),
1420                                  MEM_REF, TREE_TYPE (ref),
1421                                  base, memoff);
1422           MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1423           MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1424           dri->unconstrained_base = true;
1425           access_fns.safe_push (access_fn);
1426         }
1427     }
1428   else if (DECL_P (ref))
1429     {
1430       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1431       ref = build2 (MEM_REF, TREE_TYPE (ref),
1432                     build_fold_addr_expr (ref),
1433                     build_int_cst (reference_alias_ptr_type (ref), 0));
1434     }
1435
1436   dri->base_object = ref;
1437   dri->access_fns = access_fns;
1438 }
1439
1440 /* Extracts the alias analysis information from the memory reference DR.  */
1441
1442 static void
1443 dr_analyze_alias (struct data_reference *dr)
1444 {
1445   tree ref = DR_REF (dr);
1446   tree base = get_base_address (ref), addr;
1447
1448   if (INDIRECT_REF_P (base)
1449       || TREE_CODE (base) == MEM_REF)
1450     {
1451       addr = TREE_OPERAND (base, 0);
1452       if (TREE_CODE (addr) == SSA_NAME)
1453         DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1454     }
1455 }
1456
1457 /* Frees data reference DR.  */
1458
1459 void
1460 free_data_ref (data_reference_p dr)
1461 {
1462   DR_ACCESS_FNS (dr).release ();
1463   if (dr->alt_indices.base_object)
1464     dr->alt_indices.access_fns.release ();
1465   free (dr);
1466 }
1467
1468 /* Analyze memory reference MEMREF, which is accessed in STMT.
1469    The reference is a read if IS_READ is true, otherwise it is a write.
1470    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1471    within STMT, i.e. that it might not occur even if STMT is executed
1472    and runs to completion.
1473
1474    Return the data_reference description of MEMREF.  NEST is the outermost
1475    loop in which the reference should be instantiated, LOOP is the loop
1476    in which the data reference should be analyzed.  */
1477
1478 struct data_reference *
1479 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1480                  bool is_read, bool is_conditional_in_stmt)
1481 {
1482   struct data_reference *dr;
1483
1484   if (dump_file && (dump_flags & TDF_DETAILS))
1485     {
1486       fprintf (dump_file, "Creating dr for ");
1487       print_generic_expr (dump_file, memref, TDF_SLIM);
1488       fprintf (dump_file, "\n");
1489     }
1490
1491   dr = XCNEW (struct data_reference);
1492   DR_STMT (dr) = stmt;
1493   DR_REF (dr) = memref;
1494   DR_IS_READ (dr) = is_read;
1495   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1496
1497   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1498                         nest != NULL ? loop : NULL, stmt);
1499   dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop);
1500   dr_analyze_alias (dr);
1501
1502   if (dump_file && (dump_flags & TDF_DETAILS))
1503     {
1504       unsigned i;
1505       fprintf (dump_file, "\tbase_address: ");
1506       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1507       fprintf (dump_file, "\n\toffset from base address: ");
1508       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1509       fprintf (dump_file, "\n\tconstant offset from base address: ");
1510       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1511       fprintf (dump_file, "\n\tstep: ");
1512       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1513       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1514       fprintf (dump_file, "\n\tbase misalignment: %d",
1515                DR_BASE_MISALIGNMENT (dr));
1516       fprintf (dump_file, "\n\toffset alignment: %d",
1517                DR_OFFSET_ALIGNMENT (dr));
1518       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1519       fprintf (dump_file, "\n\tbase_object: ");
1520       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1521       fprintf (dump_file, "\n");
1522       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1523         {
1524           fprintf (dump_file, "\tAccess function %d: ", i);
1525           print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1526         }
1527     }
1528
1529   return dr;
1530 }
1531
1532 /*  A helper function computes order between two tree expressions T1 and T2.
1533     This is used in comparator functions sorting objects based on the order
1534     of tree expressions.  The function returns -1, 0, or 1.  */
1535
1536 int
1537 data_ref_compare_tree (tree t1, tree t2)
1538 {
1539   int i, cmp;
1540   enum tree_code code;
1541   char tclass;
1542
1543   if (t1 == t2)
1544     return 0;
1545   if (t1 == NULL)
1546     return -1;
1547   if (t2 == NULL)
1548     return 1;
1549
1550   STRIP_USELESS_TYPE_CONVERSION (t1);
1551   STRIP_USELESS_TYPE_CONVERSION (t2);
1552   if (t1 == t2)
1553     return 0;
1554
1555   if (TREE_CODE (t1) != TREE_CODE (t2)
1556       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1557     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1558
1559   code = TREE_CODE (t1);
1560   switch (code)
1561     {
1562     case INTEGER_CST:
1563       return tree_int_cst_compare (t1, t2);
1564
1565     case STRING_CST:
1566       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1567         return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1568       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1569                      TREE_STRING_LENGTH (t1));
1570
1571     case SSA_NAME:
1572       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1573         return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1574       break;
1575
1576     default:
1577       if (POLY_INT_CST_P (t1))
1578         return compare_sizes_for_sort (wi::to_poly_widest (t1),
1579                                        wi::to_poly_widest (t2));
1580
1581       tclass = TREE_CODE_CLASS (code);
1582
1583       /* For decls, compare their UIDs.  */
1584       if (tclass == tcc_declaration)
1585         {
1586           if (DECL_UID (t1) != DECL_UID (t2))
1587             return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1588           break;
1589         }
1590       /* For expressions, compare their operands recursively.  */
1591       else if (IS_EXPR_CODE_CLASS (tclass))
1592         {
1593           for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1594             {
1595               cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1596                                            TREE_OPERAND (t2, i));
1597               if (cmp != 0)
1598                 return cmp;
1599             }
1600         }
1601       else
1602         gcc_unreachable ();
1603     }
1604
1605   return 0;
1606 }
1607
1608 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1609    check.  */
1610
1611 opt_result
1612 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1613 {
1614   if (dump_enabled_p ())
1615     dump_printf (MSG_NOTE,
1616                  "consider run-time aliasing test between %T and %T\n",
1617                  DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1618
1619   if (!speed_p)
1620     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1621                                    "runtime alias check not supported when"
1622                                    " optimizing for size.\n");
1623
1624   /* FORNOW: We don't support versioning with outer-loop in either
1625      vectorization or loop distribution.  */
1626   if (loop != NULL && loop->inner != NULL)
1627     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1628                                    "runtime alias check not supported for"
1629                                    " outer loop.\n");
1630
1631   return opt_result::success ();
1632 }
1633
1634 /* Operator == between two dr_with_seg_len objects.
1635
1636    This equality operator is used to make sure two data refs
1637    are the same one so that we will consider to combine the
1638    aliasing checks of those two pairs of data dependent data
1639    refs.  */
1640
1641 static bool
1642 operator == (const dr_with_seg_len& d1,
1643              const dr_with_seg_len& d2)
1644 {
1645   return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1646                            DR_BASE_ADDRESS (d2.dr), 0)
1647           && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1648           && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1649           && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1650           && known_eq (d1.access_size, d2.access_size)
1651           && d1.align == d2.align);
1652 }
1653
1654 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1655    so that we can combine aliasing checks in one scan.  */
1656
1657 static int
1658 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1659 {
1660   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1661   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1662   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1663   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1664
1665   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1666      if a and c have the same basic address snd step, and b and d have the same
1667      address and step.  Therefore, if any a&c or b&d don't have the same address
1668      and step, we don't care the order of those two pairs after sorting.  */
1669   int comp_res;
1670
1671   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1672                                          DR_BASE_ADDRESS (b1.dr))) != 0)
1673     return comp_res;
1674   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1675                                          DR_BASE_ADDRESS (b2.dr))) != 0)
1676     return comp_res;
1677   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1678                                          DR_STEP (b1.dr))) != 0)
1679     return comp_res;
1680   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1681                                          DR_STEP (b2.dr))) != 0)
1682     return comp_res;
1683   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1684                                          DR_OFFSET (b1.dr))) != 0)
1685     return comp_res;
1686   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1687                                          DR_INIT (b1.dr))) != 0)
1688     return comp_res;
1689   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1690                                          DR_OFFSET (b2.dr))) != 0)
1691     return comp_res;
1692   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1693                                          DR_INIT (b2.dr))) != 0)
1694     return comp_res;
1695
1696   return 0;
1697 }
1698
1699 /* Dump information about ALIAS_PAIR, indenting each line by INDENT.  */
1700
1701 static void
1702 dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1703 {
1704   dump_printf (MSG_NOTE, "%sreference:      %T vs. %T\n", indent,
1705                DR_REF (alias_pair->first.dr),
1706                DR_REF (alias_pair->second.dr));
1707
1708   dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1709                alias_pair->first.seg_len);
1710   if (!operand_equal_p (alias_pair->first.seg_len,
1711                         alias_pair->second.seg_len, 0))
1712     dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1713
1714   dump_printf (MSG_NOTE, "\n%saccess size:    ", indent);
1715   dump_dec (MSG_NOTE, alias_pair->first.access_size);
1716   if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1717     {
1718       dump_printf (MSG_NOTE, " vs. ");
1719       dump_dec (MSG_NOTE, alias_pair->second.access_size);
1720     }
1721
1722   dump_printf (MSG_NOTE, "\n%salignment:      %d", indent,
1723                alias_pair->first.align);
1724   if (alias_pair->first.align != alias_pair->second.align)
1725     dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1726
1727   dump_printf (MSG_NOTE, "\n%sflags:         ", indent);
1728   if (alias_pair->flags & DR_ALIAS_RAW)
1729     dump_printf (MSG_NOTE, " RAW");
1730   if (alias_pair->flags & DR_ALIAS_WAR)
1731     dump_printf (MSG_NOTE, " WAR");
1732   if (alias_pair->flags & DR_ALIAS_WAW)
1733     dump_printf (MSG_NOTE, " WAW");
1734   if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1735     dump_printf (MSG_NOTE, " ARBITRARY");
1736   if (alias_pair->flags & DR_ALIAS_SWAPPED)
1737     dump_printf (MSG_NOTE, " SWAPPED");
1738   if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1739     dump_printf (MSG_NOTE, " UNSWAPPED");
1740   if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1741     dump_printf (MSG_NOTE, " MIXED_STEPS");
1742   if (alias_pair->flags == 0)
1743     dump_printf (MSG_NOTE, " <none>");
1744   dump_printf (MSG_NOTE, "\n");
1745 }
1746
1747 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1748    FACTOR is number of iterations that each data reference is accessed.
1749
1750    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1751    we create an expression:
1752
1753    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1754    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1755
1756    for aliasing checks.  However, in some cases we can decrease the number
1757    of checks by combining two checks into one.  For example, suppose we have
1758    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1759    condition is satisfied:
1760
1761    load_ptr_0 < load_ptr_1  &&
1762    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1763
1764    (this condition means, in each iteration of vectorized loop, the accessed
1765    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1766    load_ptr_1.)
1767
1768    we then can use only the following expression to finish the alising checks
1769    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1770
1771    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1772    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1773
1774    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1775    basic address.  */
1776
1777 void
1778 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1779                                poly_uint64)
1780 {
1781   if (alias_pairs->is_empty ())
1782     return;
1783
1784   /* Canonicalize each pair so that the base components are ordered wrt
1785      data_ref_compare_tree.  This allows the loop below to merge more
1786      cases.  */
1787   unsigned int i;
1788   dr_with_seg_len_pair_t *alias_pair;
1789   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1790     {
1791       data_reference_p dr_a = alias_pair->first.dr;
1792       data_reference_p dr_b = alias_pair->second.dr;
1793       int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1794                                             DR_BASE_ADDRESS (dr_b));
1795       if (comp_res == 0)
1796         comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1797       if (comp_res == 0)
1798         comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1799       if (comp_res > 0)
1800         {
1801           std::swap (alias_pair->first, alias_pair->second);
1802           alias_pair->flags |= DR_ALIAS_SWAPPED;
1803         }
1804       else
1805         alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1806     }
1807
1808   /* Sort the collected data ref pairs so that we can scan them once to
1809      combine all possible aliasing checks.  */
1810   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1811
1812   /* Scan the sorted dr pairs and check if we can combine alias checks
1813      of two neighboring dr pairs.  */
1814   unsigned int last = 0;
1815   for (i = 1; i < alias_pairs->length (); ++i)
1816     {
1817       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1818       dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1819       dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1820
1821       dr_with_seg_len *dr_a1 = &alias_pair1->first;
1822       dr_with_seg_len *dr_b1 = &alias_pair1->second;
1823       dr_with_seg_len *dr_a2 = &alias_pair2->first;
1824       dr_with_seg_len *dr_b2 = &alias_pair2->second;
1825
1826       /* Remove duplicate data ref pairs.  */
1827       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1828         {
1829           if (dump_enabled_p ())
1830             dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1831                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1832                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1833           alias_pair1->flags |= alias_pair2->flags;
1834           continue;
1835         }
1836
1837       /* Assume that we won't be able to merge the pairs, then correct
1838          if we do.  */
1839       last += 1;
1840       if (last != i)
1841         (*alias_pairs)[last] = (*alias_pairs)[i];
1842
1843       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1844         {
1845           /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1846              and DR_A1 and DR_A2 are two consecutive memrefs.  */
1847           if (*dr_a1 == *dr_a2)
1848             {
1849               std::swap (dr_a1, dr_b1);
1850               std::swap (dr_a2, dr_b2);
1851             }
1852
1853           poly_int64 init_a1, init_a2;
1854           /* Only consider cases in which the distance between the initial
1855              DR_A1 and the initial DR_A2 is known at compile time.  */
1856           if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1857                                 DR_BASE_ADDRESS (dr_a2->dr), 0)
1858               || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1859                                    DR_OFFSET (dr_a2->dr), 0)
1860               || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1861               || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1862             continue;
1863
1864           /* Don't combine if we can't tell which one comes first.  */
1865           if (!ordered_p (init_a1, init_a2))
1866             continue;
1867
1868           /* Work out what the segment length would be if we did combine
1869              DR_A1 and DR_A2:
1870
1871              - If DR_A1 and DR_A2 have equal lengths, that length is
1872                also the combined length.
1873
1874              - If DR_A1 and DR_A2 both have negative "lengths", the combined
1875                length is the lower bound on those lengths.
1876
1877              - If DR_A1 and DR_A2 both have positive lengths, the combined
1878                length is the upper bound on those lengths.
1879
1880              Other cases are unlikely to give a useful combination.
1881
1882              The lengths both have sizetype, so the sign is taken from
1883              the step instead.  */
1884           poly_uint64 new_seg_len = 0;
1885           bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1886                                                  dr_a2->seg_len, 0);
1887           if (new_seg_len_p)
1888             {
1889               poly_uint64 seg_len_a1, seg_len_a2;
1890               if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1891                   || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1892                 continue;
1893
1894               tree indicator_a = dr_direction_indicator (dr_a1->dr);
1895               if (TREE_CODE (indicator_a) != INTEGER_CST)
1896                 continue;
1897
1898               tree indicator_b = dr_direction_indicator (dr_a2->dr);
1899               if (TREE_CODE (indicator_b) != INTEGER_CST)
1900                 continue;
1901
1902               int sign_a = tree_int_cst_sgn (indicator_a);
1903               int sign_b = tree_int_cst_sgn (indicator_b);
1904
1905               if (sign_a <= 0 && sign_b <= 0)
1906                 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1907               else if (sign_a >= 0 && sign_b >= 0)
1908                 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1909               else
1910                 continue;
1911             }
1912           /* At this point we're committed to merging the refs.  */
1913
1914           /* Make sure dr_a1 starts left of dr_a2.  */
1915           if (maybe_gt (init_a1, init_a2))
1916             {
1917               std::swap (*dr_a1, *dr_a2);
1918               std::swap (init_a1, init_a2);
1919             }
1920
1921           /* The DR_Bs are equal, so only the DR_As can introduce
1922              mixed steps.  */
1923           if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1924             alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1925
1926           if (new_seg_len_p)
1927             {
1928               dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1929                                               new_seg_len);
1930               dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1931             }
1932
1933           /* This is always positive due to the swap above.  */
1934           poly_uint64 diff = init_a2 - init_a1;
1935
1936           /* The new check will start at DR_A1.  Make sure that its access
1937              size encompasses the initial DR_A2.  */
1938           if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1939             {
1940               dr_a1->access_size = upper_bound (dr_a1->access_size,
1941                                                 diff + dr_a2->access_size);
1942               unsigned int new_align = known_alignment (dr_a1->access_size);
1943               dr_a1->align = MIN (dr_a1->align, new_align);
1944             }
1945           if (dump_enabled_p ())
1946             dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1947                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1948                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1949           alias_pair1->flags |= alias_pair2->flags;
1950           last -= 1;
1951         }
1952     }
1953   alias_pairs->truncate (last + 1);
1954
1955   /* Try to restore the original dr_with_seg_len order within each
1956      dr_with_seg_len_pair_t.  If we ended up combining swapped and
1957      unswapped pairs into the same check, we have to invalidate any
1958      RAW, WAR and WAW information for it.  */
1959   if (dump_enabled_p ())
1960     dump_printf (MSG_NOTE, "merged alias checks:\n");
1961   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1962     {
1963       unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1964       unsigned int swapped = (alias_pair->flags & swap_mask);
1965       if (swapped == DR_ALIAS_SWAPPED)
1966         std::swap (alias_pair->first, alias_pair->second);
1967       else if (swapped != DR_ALIAS_UNSWAPPED)
1968         alias_pair->flags |= DR_ALIAS_ARBITRARY;
1969       alias_pair->flags &= ~swap_mask;
1970       if (dump_enabled_p ())
1971         dump_alias_pair (alias_pair, "  ");
1972     }
1973 }
1974
1975 /* A subroutine of create_intersect_range_checks, with a subset of the
1976    same arguments.  Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
1977    to optimize cases in which the references form a simple RAW, WAR or
1978    WAR dependence.  */
1979
1980 static bool
1981 create_ifn_alias_checks (tree *cond_expr,
1982                          const dr_with_seg_len_pair_t &alias_pair)
1983 {
1984   const dr_with_seg_len& dr_a = alias_pair.first;
1985   const dr_with_seg_len& dr_b = alias_pair.second;
1986
1987   /* Check for cases in which:
1988
1989      (a) we have a known RAW, WAR or WAR dependence
1990      (b) the accesses are well-ordered in both the original and new code
1991          (see the comment above the DR_ALIAS_* flags for details); and
1992      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
1993   if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
1994     return false;
1995
1996   /* Make sure that both DRs access the same pattern of bytes,
1997      with a constant length and step.  */
1998   poly_uint64 seg_len;
1999   if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
2000       || !poly_int_tree_p (dr_a.seg_len, &seg_len)
2001       || maybe_ne (dr_a.access_size, dr_b.access_size)
2002       || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
2003       || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
2004     return false;
2005
2006   unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
2007   tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
2008   tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
2009
2010   /* See whether the target suports what we want to do.  WAW checks are
2011      equivalent to WAR checks here.  */
2012   internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
2013                      ? IFN_CHECK_RAW_PTRS
2014                      : IFN_CHECK_WAR_PTRS);
2015   unsigned int align = MIN (dr_a.align, dr_b.align);
2016   poly_uint64 full_length = seg_len + bytes;
2017   if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2018                                            full_length, align))
2019     {
2020       full_length = seg_len + dr_a.access_size;
2021       if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2022                                                full_length, align))
2023         return false;
2024     }
2025
2026   /* Commit to using this form of test.  */
2027   addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
2028   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2029
2030   addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
2031   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2032
2033   *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
2034                                              ifn, boolean_type_node,
2035                                              4, addr_a, addr_b,
2036                                              size_int (full_length),
2037                                              size_int (align));
2038
2039   if (dump_enabled_p ())
2040     {
2041       if (ifn == IFN_CHECK_RAW_PTRS)
2042         dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
2043       else
2044         dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
2045     }
2046   return true;
2047 }
2048
2049 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
2050    free of aliases, using a condition based on index values instead
2051    of a condition based on addresses.  Return true on success,
2052    storing the condition in *COND_EXPR.
2053
2054    This can only be done if the two data references in ALIAS_PAIR access
2055    the same array object and the index is the only difference.  For example,
2056    if the two data references are DR_A and DR_B:
2057
2058                        DR_A                           DR_B
2059       data-ref         arr[i]                         arr[j]
2060       base_object      arr                            arr
2061       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
2062
2063    The addresses and their index are like:
2064
2065         |<- ADDR_A    ->|          |<- ADDR_B    ->|
2066      ------------------------------------------------------->
2067         |   |   |   |   |          |   |   |   |   |
2068      ------------------------------------------------------->
2069         i_0 ...         i_0+4      j_0 ...         j_0+4
2070
2071    We can create expression based on index rather than address:
2072
2073      (unsigned) (i_0 - j_0 + 3) <= 6
2074
2075    i.e. the indices are less than 4 apart.
2076
2077    Note evolution step of index needs to be considered in comparison.  */
2078
2079 static bool
2080 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
2081                                      const dr_with_seg_len_pair_t &alias_pair)
2082 {
2083   const dr_with_seg_len &dr_a = alias_pair.first;
2084   const dr_with_seg_len &dr_b = alias_pair.second;
2085   if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
2086       || integer_zerop (DR_STEP (dr_a.dr))
2087       || integer_zerop (DR_STEP (dr_b.dr))
2088       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2089     return false;
2090
2091   poly_uint64 seg_len1, seg_len2;
2092   if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
2093       || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
2094     return false;
2095
2096   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2097     return false;
2098
2099   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2100     return false;
2101
2102   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2103     return false;
2104
2105   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2106
2107   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2108   unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
2109   if (neg_step)
2110     {
2111       abs_step = -abs_step;
2112       seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
2113       seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
2114     }
2115
2116   /* Infer the number of iterations with which the memory segment is accessed
2117      by DR.  In other words, alias is checked if memory segment accessed by
2118      DR_A in some iterations intersect with memory segment accessed by DR_B
2119      in the same amount iterations.
2120      Note segnment length is a linear function of number of iterations with
2121      DR_STEP as the coefficient.  */
2122   poly_uint64 niter_len1, niter_len2;
2123   if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
2124       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
2125     return false;
2126
2127   /* Divide each access size by the byte step, rounding up.  */
2128   poly_uint64 niter_access1, niter_access2;
2129   if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
2130                         abs_step, &niter_access1)
2131       || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
2132                            abs_step, &niter_access2))
2133     return false;
2134
2135   bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
2136
2137   int found = -1;
2138   for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2139     {
2140       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2141       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2142       /* Two indices must be the same if they are not scev, or not scev wrto
2143          current loop being vecorized.  */
2144       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2145           || TREE_CODE (access2) != POLYNOMIAL_CHREC
2146           || CHREC_VARIABLE (access1) != (unsigned)loop->num
2147           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2148         {
2149           if (operand_equal_p (access1, access2, 0))
2150             continue;
2151
2152           return false;
2153         }
2154       if (found >= 0)
2155         return false;
2156       found = i;
2157     }
2158
2159   /* Ought not to happen in practice, since if all accesses are equal then the
2160      alias should be decidable at compile time.  */
2161   if (found < 0)
2162     return false;
2163
2164   /* The two indices must have the same step.  */
2165   tree access1 = DR_ACCESS_FN (dr_a.dr, found);
2166   tree access2 = DR_ACCESS_FN (dr_b.dr, found);
2167   if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2168     return false;
2169
2170   tree idx_step = CHREC_RIGHT (access1);
2171   /* Index must have const step, otherwise DR_STEP won't be constant.  */
2172   gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2173   /* Index must evaluate in the same direction as DR.  */
2174   gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2175
2176   tree min1 = CHREC_LEFT (access1);
2177   tree min2 = CHREC_LEFT (access2);
2178   if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2179     return false;
2180
2181   /* Ideally, alias can be checked against loop's control IV, but we
2182      need to prove linear mapping between control IV and reference
2183      index.  Although that should be true, we check against (array)
2184      index of data reference.  Like segment length, index length is
2185      linear function of the number of iterations with index_step as
2186      the coefficient, i.e, niter_len * idx_step.  */
2187   offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
2188                                               SIGNED);
2189   if (neg_step)
2190     abs_idx_step = -abs_idx_step;
2191   poly_offset_int idx_len1 = abs_idx_step * niter_len1;
2192   poly_offset_int idx_len2 = abs_idx_step * niter_len2;
2193   poly_offset_int idx_access1 = abs_idx_step * niter_access1;
2194   poly_offset_int idx_access2 = abs_idx_step * niter_access2;
2195
2196   gcc_assert (known_ge (idx_len1, 0)
2197               && known_ge (idx_len2, 0)
2198               && known_ge (idx_access1, 0)
2199               && known_ge (idx_access2, 0));
2200
2201   /* Each access has the following pattern, with lengths measured
2202      in units of INDEX:
2203
2204           <-- idx_len -->
2205           <--- A: -ve step --->
2206           +-----+-------+-----+-------+-----+
2207           | n-1 | ..... |  0  | ..... | n-1 |
2208           +-----+-------+-----+-------+-----+
2209                         <--- B: +ve step --->
2210                         <-- idx_len -->
2211                         |
2212                        min
2213
2214      where "n" is the number of scalar iterations covered by the segment
2215      and where each access spans idx_access units.
2216
2217      A is the range of bytes accessed when the step is negative,
2218      B is the range when the step is positive.
2219
2220      When checking for general overlap, we need to test whether
2221      the range:
2222
2223        [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2224
2225      overlaps:
2226
2227        [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2228
2229      where:
2230
2231         low_offsetN = +ve step ? 0 : -idx_lenN;
2232        high_offsetN = +ve step ? idx_lenN : 0;
2233
2234      This is equivalent to testing whether:
2235
2236        min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2237        && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2238
2239      Converting this into a single test, there is an overlap if:
2240
2241        0 <= min2 - min1 + bias <= limit
2242
2243      where  bias = high_offset2 + idx_access2 - 1 - low_offset1
2244            limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2245                  + (high_offset2 - low_offset2 + idx_access2 - 1)
2246       i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2247
2248      Combining the tests requires limit to be computable in an unsigned
2249      form of the index type; if it isn't, we fall back to the usual
2250      pointer-based checks.
2251
2252      We can do better if DR_B is a write and if DR_A and DR_B are
2253      well-ordered in both the original and the new code (see the
2254      comment above the DR_ALIAS_* flags for details).  In this case
2255      we know that for each i in [0, n-1], the write performed by
2256      access i of DR_B occurs after access numbers j<=i of DR_A in
2257      both the original and the new code.  Any write or anti
2258      dependencies wrt those DR_A accesses are therefore maintained.
2259
2260      We just need to make sure that each individual write in DR_B does not
2261      overlap any higher-indexed access in DR_A; such DR_A accesses happen
2262      after the DR_B access in the original code but happen before it in
2263      the new code.
2264
2265      We know the steps for both accesses are equal, so by induction, we
2266      just need to test whether the first write of DR_B overlaps a later
2267      access of DR_A.  In other words, we need to move min1 along by
2268      one iteration:
2269
2270        min1' = min1 + idx_step
2271
2272      and use the ranges:
2273
2274        [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2275
2276      and:
2277
2278        [min2, min2 + idx_access2 - 1]
2279
2280      where:
2281
2282         low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2283        high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0.  */
2284   if (waw_or_war_p)
2285     idx_len1 -= abs_idx_step;
2286
2287   poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2288   if (!waw_or_war_p)
2289     limit += idx_len2;
2290
2291   tree utype = unsigned_type_for (TREE_TYPE (min1));
2292   if (!wi::fits_to_tree_p (limit, utype))
2293     return false;
2294
2295   poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2296   poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2297   poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2298   /* Equivalent to adding IDX_STEP to MIN1.  */
2299   if (waw_or_war_p)
2300     bias -= wi::to_offset (idx_step);
2301
2302   tree subject = fold_build2 (MINUS_EXPR, utype,
2303                               fold_convert (utype, min2),
2304                               fold_convert (utype, min1));
2305   subject = fold_build2 (PLUS_EXPR, utype, subject,
2306                          wide_int_to_tree (utype, bias));
2307   tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2308                                      wide_int_to_tree (utype, limit));
2309   if (*cond_expr)
2310     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2311                               *cond_expr, part_cond_expr);
2312   else
2313     *cond_expr = part_cond_expr;
2314   if (dump_enabled_p ())
2315     {
2316       if (waw_or_war_p)
2317         dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2318       else
2319         dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2320     }
2321   return true;
2322 }
2323
2324 /* A subroutine of create_intersect_range_checks, with a subset of the
2325    same arguments.  Try to optimize cases in which the second access
2326    is a write and in which some overlap is valid.  */
2327
2328 static bool
2329 create_waw_or_war_checks (tree *cond_expr,
2330                           const dr_with_seg_len_pair_t &alias_pair)
2331 {
2332   const dr_with_seg_len& dr_a = alias_pair.first;
2333   const dr_with_seg_len& dr_b = alias_pair.second;
2334
2335   /* Check for cases in which:
2336
2337      (a) DR_B is always a write;
2338      (b) the accesses are well-ordered in both the original and new code
2339          (see the comment above the DR_ALIAS_* flags for details); and
2340      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
2341   if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2342     return false;
2343
2344   /* Check for equal (but possibly variable) steps.  */
2345   tree step = DR_STEP (dr_a.dr);
2346   if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2347     return false;
2348
2349   /* Make sure that we can operate on sizetype without loss of precision.  */
2350   tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2351   if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2352     return false;
2353
2354   /* All addresses involved are known to have a common alignment ALIGN.
2355      We can therefore subtract ALIGN from an exclusive endpoint to get
2356      an inclusive endpoint.  In the best (and common) case, ALIGN is the
2357      same as the access sizes of both DRs, and so subtracting ALIGN
2358      cancels out the addition of an access size.  */
2359   unsigned int align = MIN (dr_a.align, dr_b.align);
2360   poly_uint64 last_chunk_a = dr_a.access_size - align;
2361   poly_uint64 last_chunk_b = dr_b.access_size - align;
2362
2363   /* Get a boolean expression that is true when the step is negative.  */
2364   tree indicator = dr_direction_indicator (dr_a.dr);
2365   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2366                                fold_convert (ssizetype, indicator),
2367                                ssize_int (0));
2368
2369   /* Get lengths in sizetype.  */
2370   tree seg_len_a
2371     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2372   step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2373
2374   /* Each access has the following pattern:
2375
2376           <- |seg_len| ->
2377           <--- A: -ve step --->
2378           +-----+-------+-----+-------+-----+
2379           | n-1 | ..... |  0  | ..... | n-1 |
2380           +-----+-------+-----+-------+-----+
2381                         <--- B: +ve step --->
2382                         <- |seg_len| ->
2383                         |
2384                    base address
2385
2386      where "n" is the number of scalar iterations covered by the segment.
2387
2388      A is the range of bytes accessed when the step is negative,
2389      B is the range when the step is positive.
2390
2391      We know that DR_B is a write.  We also know (from checking that
2392      DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2393      the write performed by access i of DR_B occurs after access numbers
2394      j<=i of DR_A in both the original and the new code.  Any write or
2395      anti dependencies wrt those DR_A accesses are therefore maintained.
2396
2397      We just need to make sure that each individual write in DR_B does not
2398      overlap any higher-indexed access in DR_A; such DR_A accesses happen
2399      after the DR_B access in the original code but happen before it in
2400      the new code.
2401
2402      We know the steps for both accesses are equal, so by induction, we
2403      just need to test whether the first write of DR_B overlaps a later
2404      access of DR_A.  In other words, we need to move addr_a along by
2405      one iteration:
2406
2407        addr_a' = addr_a + step
2408
2409      and check whether:
2410
2411        [addr_b, addr_b + last_chunk_b]
2412
2413      overlaps:
2414
2415        [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2416
2417      where [low_offset_a, high_offset_a] spans accesses [1, n-1].  I.e.:
2418
2419         low_offset_a = +ve step ? 0 : seg_len_a - step
2420        high_offset_a = +ve step ? seg_len_a - step : 0
2421
2422      This is equivalent to testing whether:
2423
2424        addr_a' + low_offset_a <= addr_b + last_chunk_b
2425        && addr_b <= addr_a' + high_offset_a + last_chunk_a
2426
2427      Converting this into a single test, there is an overlap if:
2428
2429        0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2430
2431      where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2432
2433      If DR_A is performed, limit + |step| - last_chunk_b is known to be
2434      less than the size of the object underlying DR_A.  We also know
2435      that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2436      guaranteed at compile time.  There can therefore be no overflow if
2437      "limit" is calculated in an unsigned type with pointer precision.  */
2438   tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2439                                          DR_OFFSET (dr_a.dr));
2440   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2441
2442   tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2443                                          DR_OFFSET (dr_b.dr));
2444   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2445
2446   /* Advance ADDR_A by one iteration and adjust the length to compensate.  */
2447   addr_a = fold_build_pointer_plus (addr_a, step);
2448   tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2449                                            seg_len_a, step);
2450   if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2451     seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2452
2453   tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2454                                    seg_len_a_minus_step, size_zero_node);
2455   if (!CONSTANT_CLASS_P (low_offset_a))
2456     low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2457
2458   /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2459      but it's usually more efficient to reuse the LOW_OFFSET_A result.  */
2460   tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2461                                     low_offset_a);
2462
2463   /* The amount added to addr_b - addr_a'.  */
2464   tree bias = fold_build2 (MINUS_EXPR, sizetype,
2465                            size_int (last_chunk_b), low_offset_a);
2466
2467   tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2468   limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2469                        size_int (last_chunk_a + last_chunk_b));
2470
2471   tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
2472   subject = fold_build2 (PLUS_EXPR, sizetype,
2473                          fold_convert (sizetype, subject), bias);
2474
2475   *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2476   if (dump_enabled_p ())
2477     dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2478   return true;
2479 }
2480
2481 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2482    every address ADDR accessed by D:
2483
2484      *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2485
2486    In this case, every element accessed by D is aligned to at least
2487    ALIGN bytes.
2488
2489    If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2490
2491      *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT.  */
2492
2493 static void
2494 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2495                      tree *seg_max_out, HOST_WIDE_INT align)
2496 {
2497   /* Each access has the following pattern:
2498
2499           <- |seg_len| ->
2500           <--- A: -ve step --->
2501           +-----+-------+-----+-------+-----+
2502           | n-1 | ,.... |  0  | ..... | n-1 |
2503           +-----+-------+-----+-------+-----+
2504                         <--- B: +ve step --->
2505                         <- |seg_len| ->
2506                         |
2507                    base address
2508
2509      where "n" is the number of scalar iterations covered by the segment.
2510      (This should be VF for a particular pair if we know that both steps
2511      are the same, otherwise it will be the full number of scalar loop
2512      iterations.)
2513
2514      A is the range of bytes accessed when the step is negative,
2515      B is the range when the step is positive.
2516
2517      If the access size is "access_size" bytes, the lowest addressed byte is:
2518
2519          base + (step < 0 ? seg_len : 0)   [LB]
2520
2521      and the highest addressed byte is always below:
2522
2523          base + (step < 0 ? 0 : seg_len) + access_size   [UB]
2524
2525      Thus:
2526
2527          LB <= ADDR < UB
2528
2529      If ALIGN is nonzero, all three values are aligned to at least ALIGN
2530      bytes, so:
2531
2532          LB <= ADDR <= UB - ALIGN
2533
2534      where "- ALIGN" folds naturally with the "+ access_size" and often
2535      cancels it out.
2536
2537      We don't try to simplify LB and UB beyond this (e.g. by using
2538      MIN and MAX based on whether seg_len rather than the stride is
2539      negative) because it is possible for the absolute size of the
2540      segment to overflow the range of a ssize_t.
2541
2542      Keeping the pointer_plus outside of the cond_expr should allow
2543      the cond_exprs to be shared with other alias checks.  */
2544   tree indicator = dr_direction_indicator (d.dr);
2545   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2546                                fold_convert (ssizetype, indicator),
2547                                ssize_int (0));
2548   tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2549                                             DR_OFFSET (d.dr));
2550   addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2551   tree seg_len
2552     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2553
2554   tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2555                                 seg_len, size_zero_node);
2556   tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2557                                 size_zero_node, seg_len);
2558   max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2559                            size_int (d.access_size - align));
2560
2561   *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2562   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2563 }
2564
2565 /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2566    storing the condition in *COND_EXPR.  The fallback is to generate a
2567    a test that the two accesses do not overlap:
2568
2569      end_a <= start_b || end_b <= start_a.  */
2570
2571 static void
2572 create_intersect_range_checks (class loop *loop, tree *cond_expr,
2573                                const dr_with_seg_len_pair_t &alias_pair)
2574 {
2575   const dr_with_seg_len& dr_a = alias_pair.first;
2576   const dr_with_seg_len& dr_b = alias_pair.second;
2577   *cond_expr = NULL_TREE;
2578   if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2579     return;
2580
2581   if (create_ifn_alias_checks (cond_expr, alias_pair))
2582     return;
2583
2584   if (create_waw_or_war_checks (cond_expr, alias_pair))
2585     return;
2586
2587   unsigned HOST_WIDE_INT min_align;
2588   tree_code cmp_code;
2589   /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2590      are equivalent.  This is just an optimization heuristic.  */
2591   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2592       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2593     {
2594       /* In this case adding access_size to seg_len is likely to give
2595          a simple X * step, where X is either the number of scalar
2596          iterations or the vectorization factor.  We're better off
2597          keeping that, rather than subtracting an alignment from it.
2598
2599          In this case the maximum values are exclusive and so there is
2600          no alias if the maximum of one segment equals the minimum
2601          of another.  */
2602       min_align = 0;
2603       cmp_code = LE_EXPR;
2604     }
2605   else
2606     {
2607       /* Calculate the minimum alignment shared by all four pointers,
2608          then arrange for this alignment to be subtracted from the
2609          exclusive maximum values to get inclusive maximum values.
2610          This "- min_align" is cumulative with a "+ access_size"
2611          in the calculation of the maximum values.  In the best
2612          (and common) case, the two cancel each other out, leaving
2613          us with an inclusive bound based only on seg_len.  In the
2614          worst case we're simply adding a smaller number than before.
2615
2616          Because the maximum values are inclusive, there is an alias
2617          if the maximum value of one segment is equal to the minimum
2618          value of the other.  */
2619       min_align = MIN (dr_a.align, dr_b.align);
2620       cmp_code = LT_EXPR;
2621     }
2622
2623   tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2624   get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2625   get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2626
2627   *cond_expr
2628     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2629         fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2630         fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2631   if (dump_enabled_p ())
2632     dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2633 }
2634
2635 /* Create a conditional expression that represents the run-time checks for
2636    overlapping of address ranges represented by a list of data references
2637    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
2638    COND_EXPR is the conditional expression to be used in the if statement
2639    that controls which version of the loop gets executed at runtime.  */
2640
2641 void
2642 create_runtime_alias_checks (class loop *loop,
2643                              const vec<dr_with_seg_len_pair_t> *alias_pairs,
2644                              tree * cond_expr)
2645 {
2646   tree part_cond_expr;
2647
2648   fold_defer_overflow_warnings ();
2649   for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
2650     {
2651       gcc_assert (alias_pair.flags);
2652       if (dump_enabled_p ())
2653         dump_printf (MSG_NOTE,
2654                      "create runtime check for data references %T and %T\n",
2655                      DR_REF (alias_pair.first.dr),
2656                      DR_REF (alias_pair.second.dr));
2657
2658       /* Create condition expression for each pair data references.  */
2659       create_intersect_range_checks (loop, &part_cond_expr, alias_pair);
2660       if (*cond_expr)
2661         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2662                                   *cond_expr, part_cond_expr);
2663       else
2664         *cond_expr = part_cond_expr;
2665     }
2666   fold_undefer_and_ignore_overflow_warnings ();
2667 }
2668
2669 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2670    expressions.  */
2671 static bool
2672 dr_equal_offsets_p1 (tree offset1, tree offset2)
2673 {
2674   bool res;
2675
2676   STRIP_NOPS (offset1);
2677   STRIP_NOPS (offset2);
2678
2679   if (offset1 == offset2)
2680     return true;
2681
2682   if (TREE_CODE (offset1) != TREE_CODE (offset2)
2683       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2684     return false;
2685
2686   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2687                              TREE_OPERAND (offset2, 0));
2688
2689   if (!res || !BINARY_CLASS_P (offset1))
2690     return res;
2691
2692   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2693                              TREE_OPERAND (offset2, 1));
2694
2695   return res;
2696 }
2697
2698 /* Check if DRA and DRB have equal offsets.  */
2699 bool
2700 dr_equal_offsets_p (struct data_reference *dra,
2701                     struct data_reference *drb)
2702 {
2703   tree offset1, offset2;
2704
2705   offset1 = DR_OFFSET (dra);
2706   offset2 = DR_OFFSET (drb);
2707
2708   return dr_equal_offsets_p1 (offset1, offset2);
2709 }
2710
2711 /* Returns true if FNA == FNB.  */
2712
2713 static bool
2714 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2715 {
2716   unsigned i, n = fna.length ();
2717
2718   if (n != fnb.length ())
2719     return false;
2720
2721   for (i = 0; i < n; i++)
2722     if (!operand_equal_p (fna[i], fnb[i], 0))
2723       return false;
2724
2725   return true;
2726 }
2727
2728 /* If all the functions in CF are the same, returns one of them,
2729    otherwise returns NULL.  */
2730
2731 static affine_fn
2732 common_affine_function (conflict_function *cf)
2733 {
2734   unsigned i;
2735   affine_fn comm;
2736
2737   if (!CF_NONTRIVIAL_P (cf))
2738     return affine_fn ();
2739
2740   comm = cf->fns[0];
2741
2742   for (i = 1; i < cf->n; i++)
2743     if (!affine_function_equal_p (comm, cf->fns[i]))
2744       return affine_fn ();
2745
2746   return comm;
2747 }
2748
2749 /* Returns the base of the affine function FN.  */
2750
2751 static tree
2752 affine_function_base (affine_fn fn)
2753 {
2754   return fn[0];
2755 }
2756
2757 /* Returns true if FN is a constant.  */
2758
2759 static bool
2760 affine_function_constant_p (affine_fn fn)
2761 {
2762   unsigned i;
2763   tree coef;
2764
2765   for (i = 1; fn.iterate (i, &coef); i++)
2766     if (!integer_zerop (coef))
2767       return false;
2768
2769   return true;
2770 }
2771
2772 /* Returns true if FN is the zero constant function.  */
2773
2774 static bool
2775 affine_function_zero_p (affine_fn fn)
2776 {
2777   return (integer_zerop (affine_function_base (fn))
2778           && affine_function_constant_p (fn));
2779 }
2780
2781 /* Returns a signed integer type with the largest precision from TA
2782    and TB.  */
2783
2784 static tree
2785 signed_type_for_types (tree ta, tree tb)
2786 {
2787   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2788     return signed_type_for (ta);
2789   else
2790     return signed_type_for (tb);
2791 }
2792
2793 /* Applies operation OP on affine functions FNA and FNB, and returns the
2794    result.  */
2795
2796 static affine_fn
2797 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2798 {
2799   unsigned i, n, m;
2800   affine_fn ret;
2801   tree coef;
2802
2803   if (fnb.length () > fna.length ())
2804     {
2805       n = fna.length ();
2806       m = fnb.length ();
2807     }
2808   else
2809     {
2810       n = fnb.length ();
2811       m = fna.length ();
2812     }
2813
2814   ret.create (m);
2815   for (i = 0; i < n; i++)
2816     {
2817       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2818                                          TREE_TYPE (fnb[i]));
2819       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2820     }
2821
2822   for (; fna.iterate (i, &coef); i++)
2823     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2824                                  coef, integer_zero_node));
2825   for (; fnb.iterate (i, &coef); i++)
2826     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2827                                  integer_zero_node, coef));
2828
2829   return ret;
2830 }
2831
2832 /* Returns the sum of affine functions FNA and FNB.  */
2833
2834 static affine_fn
2835 affine_fn_plus (affine_fn fna, affine_fn fnb)
2836 {
2837   return affine_fn_op (PLUS_EXPR, fna, fnb);
2838 }
2839
2840 /* Returns the difference of affine functions FNA and FNB.  */
2841
2842 static affine_fn
2843 affine_fn_minus (affine_fn fna, affine_fn fnb)
2844 {
2845   return affine_fn_op (MINUS_EXPR, fna, fnb);
2846 }
2847
2848 /* Frees affine function FN.  */
2849
2850 static void
2851 affine_fn_free (affine_fn fn)
2852 {
2853   fn.release ();
2854 }
2855
2856 /* Determine for each subscript in the data dependence relation DDR
2857    the distance.  */
2858
2859 static void
2860 compute_subscript_distance (struct data_dependence_relation *ddr)
2861 {
2862   conflict_function *cf_a, *cf_b;
2863   affine_fn fn_a, fn_b, diff;
2864
2865   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2866     {
2867       unsigned int i;
2868
2869       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2870         {
2871           struct subscript *subscript;
2872
2873           subscript = DDR_SUBSCRIPT (ddr, i);
2874           cf_a = SUB_CONFLICTS_IN_A (subscript);
2875           cf_b = SUB_CONFLICTS_IN_B (subscript);
2876
2877           fn_a = common_affine_function (cf_a);
2878           fn_b = common_affine_function (cf_b);
2879           if (!fn_a.exists () || !fn_b.exists ())
2880             {
2881               SUB_DISTANCE (subscript) = chrec_dont_know;
2882               return;
2883             }
2884           diff = affine_fn_minus (fn_a, fn_b);
2885
2886           if (affine_function_constant_p (diff))
2887             SUB_DISTANCE (subscript) = affine_function_base (diff);
2888           else
2889             SUB_DISTANCE (subscript) = chrec_dont_know;
2890
2891           affine_fn_free (diff);
2892         }
2893     }
2894 }
2895
2896 /* Returns the conflict function for "unknown".  */
2897
2898 static conflict_function *
2899 conflict_fn_not_known (void)
2900 {
2901   conflict_function *fn = XCNEW (conflict_function);
2902   fn->n = NOT_KNOWN;
2903
2904   return fn;
2905 }
2906
2907 /* Returns the conflict function for "independent".  */
2908
2909 static conflict_function *
2910 conflict_fn_no_dependence (void)
2911 {
2912   conflict_function *fn = XCNEW (conflict_function);
2913   fn->n = NO_DEPENDENCE;
2914
2915   return fn;
2916 }
2917
2918 /* Returns true if the address of OBJ is invariant in LOOP.  */
2919
2920 static bool
2921 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2922 {
2923   while (handled_component_p (obj))
2924     {
2925       if (TREE_CODE (obj) == ARRAY_REF)
2926         {
2927           for (int i = 1; i < 4; ++i)
2928             if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2929                                                         loop->num))
2930               return false;
2931         }
2932       else if (TREE_CODE (obj) == COMPONENT_REF)
2933         {
2934           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2935                                                       loop->num))
2936             return false;
2937         }
2938       obj = TREE_OPERAND (obj, 0);
2939     }
2940
2941   if (!INDIRECT_REF_P (obj)
2942       && TREE_CODE (obj) != MEM_REF)
2943     return true;
2944
2945   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2946                                                   loop->num);
2947 }
2948
2949 /* Returns false if we can prove that data references A and B do not alias,
2950    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2951    considered.  */
2952
2953 bool
2954 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2955                 class loop *loop_nest)
2956 {
2957   tree addr_a = DR_BASE_OBJECT (a);
2958   tree addr_b = DR_BASE_OBJECT (b);
2959
2960   /* If we are not processing a loop nest but scalar code we
2961      do not need to care about possible cross-iteration dependences
2962      and thus can process the full original reference.  Do so,
2963      similar to how loop invariant motion applies extra offset-based
2964      disambiguation.  */
2965   if (!loop_nest)
2966     {
2967       aff_tree off1, off2;
2968       poly_widest_int size1, size2;
2969       get_inner_reference_aff (DR_REF (a), &off1, &size1);
2970       get_inner_reference_aff (DR_REF (b), &off2, &size2);
2971       aff_combination_scale (&off1, -1);
2972       aff_combination_add (&off2, &off1);
2973       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2974         return false;
2975     }
2976
2977   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2978       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2979       /* For cross-iteration dependences the cliques must be valid for the
2980          whole loop, not just individual iterations.  */
2981       && (!loop_nest
2982           || MR_DEPENDENCE_CLIQUE (addr_a) == 1
2983           || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
2984       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2985       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2986     return false;
2987
2988   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2989      do not know the size of the base-object.  So we cannot do any
2990      offset/overlap based analysis but have to rely on points-to
2991      information only.  */
2992   if (TREE_CODE (addr_a) == MEM_REF
2993       && (DR_UNCONSTRAINED_BASE (a)
2994           || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2995     {
2996       /* For true dependences we can apply TBAA.  */
2997       if (flag_strict_aliasing
2998           && DR_IS_WRITE (a) && DR_IS_READ (b)
2999           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3000                                      get_alias_set (DR_REF (b))))
3001         return false;
3002       if (TREE_CODE (addr_b) == MEM_REF)
3003         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3004                                        TREE_OPERAND (addr_b, 0));
3005       else
3006         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3007                                        build_fold_addr_expr (addr_b));
3008     }
3009   else if (TREE_CODE (addr_b) == MEM_REF
3010            && (DR_UNCONSTRAINED_BASE (b)
3011                || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
3012     {
3013       /* For true dependences we can apply TBAA.  */
3014       if (flag_strict_aliasing
3015           && DR_IS_WRITE (a) && DR_IS_READ (b)
3016           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3017                                      get_alias_set (DR_REF (b))))
3018         return false;
3019       if (TREE_CODE (addr_a) == MEM_REF)
3020         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3021                                        TREE_OPERAND (addr_b, 0));
3022       else
3023         return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3024                                        TREE_OPERAND (addr_b, 0));
3025     }
3026
3027   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3028      that is being subsetted in the loop nest.  */
3029   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
3030     return refs_output_dependent_p (addr_a, addr_b);
3031   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
3032     return refs_anti_dependent_p (addr_a, addr_b);
3033   return refs_may_alias_p (addr_a, addr_b);
3034 }
3035
3036 /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
3037    if it is meaningful to compare their associated access functions
3038    when checking for dependencies.  */
3039
3040 static bool
3041 access_fn_components_comparable_p (tree ref_a, tree ref_b)
3042 {
3043   /* Allow pairs of component refs from the following sets:
3044
3045        { REALPART_EXPR, IMAGPART_EXPR }
3046        { COMPONENT_REF }
3047        { ARRAY_REF }.  */
3048   tree_code code_a = TREE_CODE (ref_a);
3049   tree_code code_b = TREE_CODE (ref_b);
3050   if (code_a == IMAGPART_EXPR)
3051     code_a = REALPART_EXPR;
3052   if (code_b == IMAGPART_EXPR)
3053     code_b = REALPART_EXPR;
3054   if (code_a != code_b)
3055     return false;
3056
3057   if (TREE_CODE (ref_a) == COMPONENT_REF)
3058     /* ??? We cannot simply use the type of operand #0 of the refs here as
3059        the Fortran compiler smuggles type punning into COMPONENT_REFs.
3060        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
3061     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
3062             == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
3063
3064   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
3065                              TREE_TYPE (TREE_OPERAND (ref_b, 0)));
3066 }
3067
3068 /* Initialize a data dependence relation RES in LOOP_NEST.  USE_ALT_INDICES
3069    is true when the main indices of A and B were not comparable so we try again
3070    with alternate indices computed on an indirect reference.  */
3071
3072 struct data_dependence_relation *
3073 initialize_data_dependence_relation (struct data_dependence_relation *res,
3074                                      vec<loop_p> loop_nest,
3075                                      bool use_alt_indices)
3076 {
3077   struct data_reference *a = DDR_A (res);
3078   struct data_reference *b = DDR_B (res);
3079   unsigned int i;
3080
3081   struct indices *indices_a = &a->indices;
3082   struct indices *indices_b = &b->indices;
3083   if (use_alt_indices)
3084     {
3085       if (TREE_CODE (DR_REF (a)) != MEM_REF)
3086         indices_a = &a->alt_indices;
3087       if (TREE_CODE (DR_REF (b)) != MEM_REF)
3088         indices_b = &b->alt_indices;
3089     }
3090   unsigned int num_dimensions_a = indices_a->access_fns.length ();
3091   unsigned int num_dimensions_b = indices_b->access_fns.length ();
3092   if (num_dimensions_a == 0 || num_dimensions_b == 0)
3093     {
3094       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3095       return res;
3096     }
3097
3098   /* For unconstrained bases, the root (highest-indexed) subscript
3099      describes a variation in the base of the original DR_REF rather
3100      than a component access.  We have no type that accurately describes
3101      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3102      applying this subscript) so limit the search to the last real
3103      component access.
3104
3105      E.g. for:
3106
3107         void
3108         f (int a[][8], int b[][8])
3109         {
3110           for (int i = 0; i < 8; ++i)
3111             a[i * 2][0] = b[i][0];
3112         }
3113
3114      the a and b accesses have a single ARRAY_REF component reference [0]
3115      but have two subscripts.  */
3116   if (indices_a->unconstrained_base)
3117     num_dimensions_a -= 1;
3118   if (indices_b->unconstrained_base)
3119     num_dimensions_b -= 1;
3120
3121   /* These structures describe sequences of component references in
3122      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
3123      specific access function.  */
3124   struct {
3125     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3126        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3127        indices.  In C notation, these are the indices of the rightmost
3128        component references; e.g. for a sequence .b.c.d, the start
3129        index is for .d.  */
3130     unsigned int start_a;
3131     unsigned int start_b;
3132
3133     /* The sequence contains LENGTH consecutive access functions from
3134        each DR.  */
3135     unsigned int length;
3136
3137     /* The enclosing objects for the A and B sequences respectively,
3138        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3139        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
3140     tree object_a;
3141     tree object_b;
3142   } full_seq = {}, struct_seq = {};
3143
3144   /* Before each iteration of the loop:
3145
3146      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3147      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
3148   unsigned int index_a = 0;
3149   unsigned int index_b = 0;
3150   tree ref_a = DR_REF (a);
3151   tree ref_b = DR_REF (b);
3152
3153   /* Now walk the component references from the final DR_REFs back up to
3154      the enclosing base objects.  Each component reference corresponds
3155      to one access function in the DR, with access function 0 being for
3156      the final DR_REF and the highest-indexed access function being the
3157      one that is applied to the base of the DR.
3158
3159      Look for a sequence of component references whose access functions
3160      are comparable (see access_fn_components_comparable_p).  If more
3161      than one such sequence exists, pick the one nearest the base
3162      (which is the leftmost sequence in C notation).  Store this sequence
3163      in FULL_SEQ.
3164
3165      For example, if we have:
3166
3167         struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3168
3169         A: a[0][i].s.c.d
3170         B: __real b[0][i].s.e[i].f
3171
3172      (where d is the same type as the real component of f) then the access
3173      functions would be:
3174
3175                          0   1   2   3
3176         A:              .d  .c  .s [i]
3177
3178                  0   1   2   3   4   5
3179         B:  __real  .f [i]  .e  .s [i]
3180
3181      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3182      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
3183      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
3184      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3185      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
3186      index foo[10] arrays, so is again comparable.  The sequence is
3187      therefore:
3188
3189         A: [1, 3]  (i.e. [i].s.c)
3190         B: [3, 5]  (i.e. [i].s.e)
3191
3192      Also look for sequences of component references whose access
3193      functions are comparable and whose enclosing objects have the same
3194      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
3195      example, STRUCT_SEQ would be:
3196
3197         A: [1, 2]  (i.e. s.c)
3198         B: [3, 4]  (i.e. s.e)  */
3199   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
3200     {
3201       /* The alternate indices form always has a single dimension
3202          with unconstrained base.  */
3203       gcc_assert (!use_alt_indices);
3204
3205       /* REF_A and REF_B must be one of the component access types
3206          allowed by dr_analyze_indices.  */
3207       gcc_checking_assert (access_fn_component_p (ref_a));
3208       gcc_checking_assert (access_fn_component_p (ref_b));
3209
3210       /* Get the immediately-enclosing objects for REF_A and REF_B,
3211          i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3212          and DR_ACCESS_FN (B, INDEX_B).  */
3213       tree object_a = TREE_OPERAND (ref_a, 0);
3214       tree object_b = TREE_OPERAND (ref_b, 0);
3215
3216       tree type_a = TREE_TYPE (object_a);
3217       tree type_b = TREE_TYPE (object_b);
3218       if (access_fn_components_comparable_p (ref_a, ref_b))
3219         {
3220           /* This pair of component accesses is comparable for dependence
3221              analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3222              DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
3223           if (full_seq.start_a + full_seq.length != index_a
3224               || full_seq.start_b + full_seq.length != index_b)
3225             {
3226               /* The accesses don't extend the current sequence,
3227                  so start a new one here.  */
3228               full_seq.start_a = index_a;
3229               full_seq.start_b = index_b;
3230               full_seq.length = 0;
3231             }
3232
3233           /* Add this pair of references to the sequence.  */
3234           full_seq.length += 1;
3235           full_seq.object_a = object_a;
3236           full_seq.object_b = object_b;
3237
3238           /* If the enclosing objects are structures (and thus have the
3239              same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
3240           if (TREE_CODE (type_a) == RECORD_TYPE)
3241             struct_seq = full_seq;
3242
3243           /* Move to the next containing reference for both A and B.  */
3244           ref_a = object_a;
3245           ref_b = object_b;
3246           index_a += 1;
3247           index_b += 1;
3248           continue;
3249         }
3250
3251       /* Try to approach equal type sizes.  */
3252       if (!COMPLETE_TYPE_P (type_a)
3253           || !COMPLETE_TYPE_P (type_b)
3254           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3255           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3256         break;
3257
3258       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3259       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3260       if (size_a <= size_b)
3261         {
3262           index_a += 1;
3263           ref_a = object_a;
3264         }
3265       if (size_b <= size_a)
3266         {
3267           index_b += 1;
3268           ref_b = object_b;
3269         }
3270     }
3271
3272   /* See whether FULL_SEQ ends at the base and whether the two bases
3273      are equal.  We do not care about TBAA or alignment info so we can
3274      use OEP_ADDRESS_OF to avoid false negatives.  */
3275   tree base_a = indices_a->base_object;
3276   tree base_b = indices_b->base_object;
3277   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3278                       && full_seq.start_b + full_seq.length == num_dimensions_b
3279                       && (indices_a->unconstrained_base
3280                           == indices_b->unconstrained_base)
3281                       && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3282                       && (types_compatible_p (TREE_TYPE (base_a),
3283                                               TREE_TYPE (base_b))
3284                           || (!base_supports_access_fn_components_p (base_a)
3285                               && !base_supports_access_fn_components_p (base_b)
3286                               && operand_equal_p
3287                                    (TYPE_SIZE (TREE_TYPE (base_a)),
3288                                     TYPE_SIZE (TREE_TYPE (base_b)), 0)))
3289                       && (!loop_nest.exists ()
3290                           || (object_address_invariant_in_loop_p
3291                               (loop_nest[0], base_a))));
3292
3293   /* If the bases are the same, we can include the base variation too.
3294      E.g. the b accesses in:
3295
3296        for (int i = 0; i < n; ++i)
3297          b[i + 4][0] = b[i][0];
3298
3299      have a definite dependence distance of 4, while for:
3300
3301        for (int i = 0; i < n; ++i)
3302          a[i + 4][0] = b[i][0];
3303
3304      the dependence distance depends on the gap between a and b.
3305
3306      If the bases are different then we can only rely on the sequence
3307      rooted at a structure access, since arrays are allowed to overlap
3308      arbitrarily and change shape arbitrarily.  E.g. we treat this as
3309      valid code:
3310
3311        int a[256];
3312        ...
3313        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3314
3315      where two lvalues with the same int[4][3] type overlap, and where
3316      both lvalues are distinct from the object's declared type.  */
3317   if (same_base_p)
3318     {
3319       if (indices_a->unconstrained_base)
3320         full_seq.length += 1;
3321     }
3322   else
3323     full_seq = struct_seq;
3324
3325   /* Punt if we didn't find a suitable sequence.  */
3326   if (full_seq.length == 0)
3327     {
3328       if (use_alt_indices
3329           || (TREE_CODE (DR_REF (a)) == MEM_REF
3330               && TREE_CODE (DR_REF (b)) == MEM_REF)
3331           || may_be_nonaddressable_p (DR_REF (a))
3332           || may_be_nonaddressable_p (DR_REF (b)))
3333         {
3334           /* Fully exhausted possibilities.  */
3335           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3336           return res;
3337         }
3338
3339       /* Try evaluating both DRs as dereferences of pointers.  */
3340       if (!a->alt_indices.base_object
3341           && TREE_CODE (DR_REF (a)) != MEM_REF)
3342         {
3343           tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
3344                                  build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
3345                                  build_int_cst
3346                                    (reference_alias_ptr_type (DR_REF (a)), 0));
3347           dr_analyze_indices (&a->alt_indices, alt_ref,
3348                               loop_preheader_edge (loop_nest[0]),
3349                               loop_containing_stmt (DR_STMT (a)));
3350         }
3351       if (!b->alt_indices.base_object
3352           && TREE_CODE (DR_REF (b)) != MEM_REF)
3353         {
3354           tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
3355                                  build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
3356                                  build_int_cst
3357                                    (reference_alias_ptr_type (DR_REF (b)), 0));
3358           dr_analyze_indices (&b->alt_indices, alt_ref,
3359                               loop_preheader_edge (loop_nest[0]),
3360                               loop_containing_stmt (DR_STMT (b)));
3361         }
3362       return initialize_data_dependence_relation (res, loop_nest, true);
3363     }
3364
3365   if (!same_base_p)
3366     {
3367       /* Partial overlap is possible for different bases when strict aliasing
3368          is not in effect.  It's also possible if either base involves a union
3369          access; e.g. for:
3370
3371            struct s1 { int a[2]; };
3372            struct s2 { struct s1 b; int c; };
3373            struct s3 { int d; struct s1 e; };
3374            union u { struct s2 f; struct s3 g; } *p, *q;
3375
3376          the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3377          "p->g.e" (base "p->g") and might partially overlap the s1 at
3378          "q->g.e" (base "q->g").  */
3379       if (!flag_strict_aliasing
3380           || ref_contains_union_access_p (full_seq.object_a)
3381           || ref_contains_union_access_p (full_seq.object_b))
3382         {
3383           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3384           return res;
3385         }
3386
3387       DDR_COULD_BE_INDEPENDENT_P (res) = true;
3388       if (!loop_nest.exists ()
3389           || (object_address_invariant_in_loop_p (loop_nest[0],
3390                                                   full_seq.object_a)
3391               && object_address_invariant_in_loop_p (loop_nest[0],
3392                                                      full_seq.object_b)))
3393         {
3394           DDR_OBJECT_A (res) = full_seq.object_a;
3395           DDR_OBJECT_B (res) = full_seq.object_b;
3396         }
3397     }
3398
3399   DDR_AFFINE_P (res) = true;
3400   DDR_ARE_DEPENDENT (res) = NULL_TREE;
3401   DDR_SUBSCRIPTS (res).create (full_seq.length);
3402   DDR_LOOP_NEST (res) = loop_nest;
3403   DDR_SELF_REFERENCE (res) = false;
3404
3405   for (i = 0; i < full_seq.length; ++i)
3406     {
3407       struct subscript *subscript;
3408
3409       subscript = XNEW (struct subscript);
3410       SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
3411       SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
3412       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3413       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3414       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3415       SUB_DISTANCE (subscript) = chrec_dont_know;
3416       DDR_SUBSCRIPTS (res).safe_push (subscript);
3417     }
3418
3419   return res;
3420 }
3421
3422 /* Initialize a data dependence relation between data accesses A and
3423    B.  NB_LOOPS is the number of loops surrounding the references: the
3424    size of the classic distance/direction vectors.  */
3425
3426 struct data_dependence_relation *
3427 initialize_data_dependence_relation (struct data_reference *a,
3428                                      struct data_reference *b,
3429                                      vec<loop_p> loop_nest)
3430 {
3431   data_dependence_relation *res = XCNEW (struct data_dependence_relation);
3432   DDR_A (res) = a;
3433   DDR_B (res) = b;
3434   DDR_LOOP_NEST (res).create (0);
3435   DDR_SUBSCRIPTS (res).create (0);
3436   DDR_DIR_VECTS (res).create (0);
3437   DDR_DIST_VECTS (res).create (0);
3438
3439   if (a == NULL || b == NULL)
3440     {
3441       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3442       return res;
3443     }
3444
3445   /* If the data references do not alias, then they are independent.  */
3446   if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
3447     {
3448       DDR_ARE_DEPENDENT (res) = chrec_known;
3449       return res;
3450     }
3451
3452   return initialize_data_dependence_relation (res, loop_nest, false);
3453 }
3454
3455
3456 /* Frees memory used by the conflict function F.  */
3457
3458 static void
3459 free_conflict_function (conflict_function *f)
3460 {
3461   unsigned i;
3462
3463   if (CF_NONTRIVIAL_P (f))
3464     {
3465       for (i = 0; i < f->n; i++)
3466         affine_fn_free (f->fns[i]);
3467     }
3468   free (f);
3469 }
3470
3471 /* Frees memory used by SUBSCRIPTS.  */
3472
3473 static void
3474 free_subscripts (vec<subscript_p> subscripts)
3475 {
3476   for (subscript_p s : subscripts)
3477     {
3478       free_conflict_function (s->conflicting_iterations_in_a);
3479       free_conflict_function (s->conflicting_iterations_in_b);
3480       free (s);
3481     }
3482   subscripts.release ();
3483 }
3484
3485 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3486    description.  */
3487
3488 static inline void
3489 finalize_ddr_dependent (struct data_dependence_relation *ddr,
3490                         tree chrec)
3491 {
3492   DDR_ARE_DEPENDENT (ddr) = chrec;
3493   free_subscripts (DDR_SUBSCRIPTS (ddr));
3494   DDR_SUBSCRIPTS (ddr).create (0);
3495 }
3496
3497 /* The dependence relation DDR cannot be represented by a distance
3498    vector.  */
3499
3500 static inline void
3501 non_affine_dependence_relation (struct data_dependence_relation *ddr)
3502 {
3503   if (dump_file && (dump_flags & TDF_DETAILS))
3504     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3505
3506   DDR_AFFINE_P (ddr) = false;
3507 }
3508
3509 \f
3510
3511 /* This section contains the classic Banerjee tests.  */
3512
3513 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3514    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
3515
3516 static inline bool
3517 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3518 {
3519   return (evolution_function_is_constant_p (chrec_a)
3520           && evolution_function_is_constant_p (chrec_b));
3521 }
3522
3523 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3524    variable, i.e., if the SIV (Single Index Variable) test is true.  */
3525
3526 static bool
3527 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3528 {
3529   if ((evolution_function_is_constant_p (chrec_a)
3530        && evolution_function_is_univariate_p (chrec_b))
3531       || (evolution_function_is_constant_p (chrec_b)
3532           && evolution_function_is_univariate_p (chrec_a)))
3533     return true;
3534
3535   if (evolution_function_is_univariate_p (chrec_a)
3536       && evolution_function_is_univariate_p (chrec_b))
3537     {
3538       switch (TREE_CODE (chrec_a))
3539         {
3540         case POLYNOMIAL_CHREC:
3541           switch (TREE_CODE (chrec_b))
3542             {
3543             case POLYNOMIAL_CHREC:
3544               if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3545                 return false;
3546               /* FALLTHRU */
3547
3548             default:
3549               return true;
3550             }
3551
3552         default:
3553           return true;
3554         }
3555     }
3556
3557   return false;
3558 }
3559
3560 /* Creates a conflict function with N dimensions.  The affine functions
3561    in each dimension follow.  */
3562
3563 static conflict_function *
3564 conflict_fn (unsigned n, ...)
3565 {
3566   unsigned i;
3567   conflict_function *ret = XCNEW (conflict_function);
3568   va_list ap;
3569
3570   gcc_assert (n > 0 && n <= MAX_DIM);
3571   va_start (ap, n);
3572
3573   ret->n = n;
3574   for (i = 0; i < n; i++)
3575     ret->fns[i] = va_arg (ap, affine_fn);
3576   va_end (ap);
3577
3578   return ret;
3579 }
3580
3581 /* Returns constant affine function with value CST.  */
3582
3583 static affine_fn
3584 affine_fn_cst (tree cst)
3585 {
3586   affine_fn fn;
3587   fn.create (1);
3588   fn.quick_push (cst);
3589   return fn;
3590 }
3591
3592 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
3593
3594 static affine_fn
3595 affine_fn_univar (tree cst, unsigned dim, tree coef)
3596 {
3597   affine_fn fn;
3598   fn.create (dim + 1);
3599   unsigned i;
3600
3601   gcc_assert (dim > 0);
3602   fn.quick_push (cst);
3603   for (i = 1; i < dim; i++)
3604     fn.quick_push (integer_zero_node);
3605   fn.quick_push (coef);
3606   return fn;
3607 }
3608
3609 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
3610    *OVERLAPS_B are initialized to the functions that describe the
3611    relation between the elements accessed twice by CHREC_A and
3612    CHREC_B.  For k >= 0, the following property is verified:
3613
3614    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3615
3616 static void
3617 analyze_ziv_subscript (tree chrec_a,
3618                        tree chrec_b,
3619                        conflict_function **overlaps_a,
3620                        conflict_function **overlaps_b,
3621                        tree *last_conflicts)
3622 {
3623   tree type, difference;
3624   dependence_stats.num_ziv++;
3625
3626   if (dump_file && (dump_flags & TDF_DETAILS))
3627     fprintf (dump_file, "(analyze_ziv_subscript \n");
3628
3629   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3630   chrec_a = chrec_convert (type, chrec_a, NULL);
3631   chrec_b = chrec_convert (type, chrec_b, NULL);
3632   difference = chrec_fold_minus (type, chrec_a, chrec_b);
3633
3634   switch (TREE_CODE (difference))
3635     {
3636     case INTEGER_CST:
3637       if (integer_zerop (difference))
3638         {
3639           /* The difference is equal to zero: the accessed index
3640              overlaps for each iteration in the loop.  */
3641           *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3642           *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3643           *last_conflicts = chrec_dont_know;
3644           dependence_stats.num_ziv_dependent++;
3645         }
3646       else
3647         {
3648           /* The accesses do not overlap.  */
3649           *overlaps_a = conflict_fn_no_dependence ();
3650           *overlaps_b = conflict_fn_no_dependence ();
3651           *last_conflicts = integer_zero_node;
3652           dependence_stats.num_ziv_independent++;
3653         }
3654       break;
3655
3656     default:
3657       /* We're not sure whether the indexes overlap.  For the moment,
3658          conservatively answer "don't know".  */
3659       if (dump_file && (dump_flags & TDF_DETAILS))
3660         fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3661
3662       *overlaps_a = conflict_fn_not_known ();
3663       *overlaps_b = conflict_fn_not_known ();
3664       *last_conflicts = chrec_dont_know;
3665       dependence_stats.num_ziv_unimplemented++;
3666       break;
3667     }
3668
3669   if (dump_file && (dump_flags & TDF_DETAILS))
3670     fprintf (dump_file, ")\n");
3671 }
3672
3673 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3674    and only if it fits to the int type.  If this is not the case, or the
3675    bound  on the number of iterations of LOOP could not be derived, returns
3676    chrec_dont_know.  */
3677
3678 static tree
3679 max_stmt_executions_tree (class loop *loop)
3680 {
3681   widest_int nit;
3682
3683   if (!max_stmt_executions (loop, &nit))
3684     return chrec_dont_know;
3685
3686   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3687     return chrec_dont_know;
3688
3689   return wide_int_to_tree (unsigned_type_node, nit);
3690 }
3691
3692 /* Determine whether the CHREC is always positive/negative.  If the expression
3693    cannot be statically analyzed, return false, otherwise set the answer into
3694    VALUE.  */
3695
3696 static bool
3697 chrec_is_positive (tree chrec, bool *value)
3698 {
3699   bool value0, value1, value2;
3700   tree end_value, nb_iter;
3701
3702   switch (TREE_CODE (chrec))
3703     {
3704     case POLYNOMIAL_CHREC:
3705       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3706           || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3707         return false;
3708
3709       /* FIXME -- overflows.  */
3710       if (value0 == value1)
3711         {
3712           *value = value0;
3713           return true;
3714         }
3715
3716       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3717          and the proof consists in showing that the sign never
3718          changes during the execution of the loop, from 0 to
3719          loop->nb_iterations.  */
3720       if (!evolution_function_is_affine_p (chrec))
3721         return false;
3722
3723       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3724       if (chrec_contains_undetermined (nb_iter))
3725         return false;
3726
3727 #if 0
3728       /* TODO -- If the test is after the exit, we may decrease the number of
3729          iterations by one.  */
3730       if (after_exit)
3731         nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3732 #endif
3733
3734       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3735
3736       if (!chrec_is_positive (end_value, &value2))
3737         return false;
3738
3739       *value = value0;
3740       return value0 == value1;
3741
3742     case INTEGER_CST:
3743       switch (tree_int_cst_sgn (chrec))
3744         {
3745         case -1:
3746           *value = false;
3747           break;
3748         case 1:
3749           *value = true;
3750           break;
3751         default:
3752           return false;
3753         }
3754       return true;
3755
3756     default:
3757       return false;
3758     }
3759 }
3760
3761
3762 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3763    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
3764    *OVERLAPS_B are initialized to the functions that describe the
3765    relation between the elements accessed twice by CHREC_A and
3766    CHREC_B.  For k >= 0, the following property is verified:
3767
3768    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3769
3770 static void
3771 analyze_siv_subscript_cst_affine (tree chrec_a,
3772                                   tree chrec_b,
3773                                   conflict_function **overlaps_a,
3774                                   conflict_function **overlaps_b,
3775                                   tree *last_conflicts)
3776 {
3777   bool value0, value1, value2;
3778   tree type, difference, tmp;
3779
3780   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3781   chrec_a = chrec_convert (type, chrec_a, NULL);
3782   chrec_b = chrec_convert (type, chrec_b, NULL);
3783   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3784
3785   /* Special case overlap in the first iteration.  */
3786   if (integer_zerop (difference))
3787     {
3788       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3789       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3790       *last_conflicts = integer_one_node;
3791       return;
3792     }
3793
3794   if (!chrec_is_positive (initial_condition (difference), &value0))
3795     {
3796       if (dump_file && (dump_flags & TDF_DETAILS))
3797         fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3798
3799       dependence_stats.num_siv_unimplemented++;
3800       *overlaps_a = conflict_fn_not_known ();
3801       *overlaps_b = conflict_fn_not_known ();
3802       *last_conflicts = chrec_dont_know;
3803       return;
3804     }
3805   else
3806     {
3807       if (value0 == false)
3808         {
3809           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3810               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3811             {
3812               if (dump_file && (dump_flags & TDF_DETAILS))
3813                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3814
3815               *overlaps_a = conflict_fn_not_known ();
3816               *overlaps_b = conflict_fn_not_known ();
3817               *last_conflicts = chrec_dont_know;
3818               dependence_stats.num_siv_unimplemented++;
3819               return;
3820             }
3821           else
3822             {
3823               if (value1 == true)
3824                 {
3825                   /* Example:
3826                      chrec_a = 12
3827                      chrec_b = {10, +, 1}
3828                   */
3829
3830                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3831                     {
3832                       HOST_WIDE_INT numiter;
3833                       class loop *loop = get_chrec_loop (chrec_b);
3834
3835                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3836                       tmp = fold_build2 (EXACT_DIV_EXPR, type,
3837                                          fold_build1 (ABS_EXPR, type, difference),
3838                                          CHREC_RIGHT (chrec_b));
3839                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3840                       *last_conflicts = integer_one_node;
3841
3842
3843                       /* Perform weak-zero siv test to see if overlap is
3844                          outside the loop bounds.  */
3845                       numiter = max_stmt_executions_int (loop);
3846
3847                       if (numiter >= 0
3848                           && compare_tree_int (tmp, numiter) > 0)
3849                         {
3850                           free_conflict_function (*overlaps_a);
3851                           free_conflict_function (*overlaps_b);
3852                           *overlaps_a = conflict_fn_no_dependence ();
3853                           *overlaps_b = conflict_fn_no_dependence ();
3854                           *last_conflicts = integer_zero_node;
3855                           dependence_stats.num_siv_independent++;
3856                           return;
3857                         }
3858                       dependence_stats.num_siv_dependent++;
3859                       return;
3860                     }
3861
3862                   /* When the step does not divide the difference, there are
3863                      no overlaps.  */
3864                   else
3865                     {
3866                       *overlaps_a = conflict_fn_no_dependence ();
3867                       *overlaps_b = conflict_fn_no_dependence ();
3868                       *last_conflicts = integer_zero_node;
3869                       dependence_stats.num_siv_independent++;
3870                       return;
3871                     }
3872                 }
3873
3874               else
3875                 {
3876                   /* Example:
3877                      chrec_a = 12
3878                      chrec_b = {10, +, -1}
3879
3880                      In this case, chrec_a will not overlap with chrec_b.  */
3881                   *overlaps_a = conflict_fn_no_dependence ();
3882                   *overlaps_b = conflict_fn_no_dependence ();
3883                   *last_conflicts = integer_zero_node;
3884                   dependence_stats.num_siv_independent++;
3885                   return;
3886                 }
3887             }
3888         }
3889       else
3890         {
3891           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3892               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3893             {
3894               if (dump_file && (dump_flags & TDF_DETAILS))
3895                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3896
3897               *overlaps_a = conflict_fn_not_known ();
3898               *overlaps_b = conflict_fn_not_known ();
3899               *last_conflicts = chrec_dont_know;
3900               dependence_stats.num_siv_unimplemented++;
3901               return;
3902             }
3903           else
3904             {
3905               if (value2 == false)
3906                 {
3907                   /* Example:
3908                      chrec_a = 3
3909                      chrec_b = {10, +, -1}
3910                   */
3911                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3912                     {
3913                       HOST_WIDE_INT numiter;
3914                       class loop *loop = get_chrec_loop (chrec_b);
3915
3916                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3917                       tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3918                                          CHREC_RIGHT (chrec_b));
3919                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3920                       *last_conflicts = integer_one_node;
3921
3922                       /* Perform weak-zero siv test to see if overlap is
3923                          outside the loop bounds.  */
3924                       numiter = max_stmt_executions_int (loop);
3925
3926                       if (numiter >= 0
3927                           && compare_tree_int (tmp, numiter) > 0)
3928                         {
3929                           free_conflict_function (*overlaps_a);
3930                           free_conflict_function (*overlaps_b);
3931                           *overlaps_a = conflict_fn_no_dependence ();
3932                           *overlaps_b = conflict_fn_no_dependence ();
3933                           *last_conflicts = integer_zero_node;
3934                           dependence_stats.num_siv_independent++;
3935                           return;
3936                         }
3937                       dependence_stats.num_siv_dependent++;
3938                       return;
3939                     }
3940
3941                   /* When the step does not divide the difference, there
3942                      are no overlaps.  */
3943                   else
3944                     {
3945                       *overlaps_a = conflict_fn_no_dependence ();
3946                       *overlaps_b = conflict_fn_no_dependence ();
3947                       *last_conflicts = integer_zero_node;
3948                       dependence_stats.num_siv_independent++;
3949                       return;
3950                     }
3951                 }
3952               else
3953                 {
3954                   /* Example:
3955                      chrec_a = 3
3956                      chrec_b = {4, +, 1}
3957
3958                      In this case, chrec_a will not overlap with chrec_b.  */
3959                   *overlaps_a = conflict_fn_no_dependence ();
3960                   *overlaps_b = conflict_fn_no_dependence ();
3961                   *last_conflicts = integer_zero_node;
3962                   dependence_stats.num_siv_independent++;
3963                   return;
3964                 }
3965             }
3966         }
3967     }
3968 }
3969
3970 /* Helper recursive function for initializing the matrix A.  Returns
3971    the initial value of CHREC.  */
3972
3973 static tree
3974 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3975 {
3976   gcc_assert (chrec);
3977
3978   switch (TREE_CODE (chrec))
3979     {
3980     case POLYNOMIAL_CHREC:
3981       HOST_WIDE_INT chrec_right;
3982       if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
3983         return chrec_dont_know;
3984       chrec_right = int_cst_value (CHREC_RIGHT (chrec));
3985       /* We want to be able to negate without overflow.  */
3986       if (chrec_right == HOST_WIDE_INT_MIN)
3987         return chrec_dont_know;
3988       A[index][0] = mult * chrec_right;
3989       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3990
3991     case PLUS_EXPR:
3992     case MULT_EXPR:
3993     case MINUS_EXPR:
3994       {
3995         tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3996         tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3997
3998         return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3999       }
4000
4001     CASE_CONVERT:
4002       {
4003         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4004         return chrec_convert (chrec_type (chrec), op, NULL);
4005       }
4006
4007     case BIT_NOT_EXPR:
4008       {
4009         /* Handle ~X as -1 - X.  */
4010         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4011         return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
4012                               build_int_cst (TREE_TYPE (chrec), -1), op);
4013       }
4014
4015     case INTEGER_CST:
4016       return chrec;
4017
4018     default:
4019       gcc_unreachable ();
4020       return NULL_TREE;
4021     }
4022 }
4023
4024 #define FLOOR_DIV(x,y) ((x) / (y))
4025
4026 /* Solves the special case of the Diophantine equation:
4027    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4028
4029    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
4030    number of iterations that loops X and Y run.  The overlaps will be
4031    constructed as evolutions in dimension DIM.  */
4032
4033 static void
4034 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
4035                                          HOST_WIDE_INT step_a,
4036                                          HOST_WIDE_INT step_b,
4037                                          affine_fn *overlaps_a,
4038                                          affine_fn *overlaps_b,
4039                                          tree *last_conflicts, int dim)
4040 {
4041   if (((step_a > 0 && step_b > 0)
4042        || (step_a < 0 && step_b < 0)))
4043     {
4044       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
4045       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
4046
4047       gcd_steps_a_b = gcd (step_a, step_b);
4048       step_overlaps_a = step_b / gcd_steps_a_b;
4049       step_overlaps_b = step_a / gcd_steps_a_b;
4050
4051       if (niter > 0)
4052         {
4053           tau2 = FLOOR_DIV (niter, step_overlaps_a);
4054           tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
4055           last_conflict = tau2;
4056           *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
4057         }
4058       else
4059         *last_conflicts = chrec_dont_know;
4060
4061       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
4062                                       build_int_cst (NULL_TREE,
4063                                                      step_overlaps_a));
4064       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
4065                                       build_int_cst (NULL_TREE,
4066                                                      step_overlaps_b));
4067     }
4068
4069   else
4070     {
4071       *overlaps_a = affine_fn_cst (integer_zero_node);
4072       *overlaps_b = affine_fn_cst (integer_zero_node);
4073       *last_conflicts = integer_zero_node;
4074     }
4075 }
4076
4077 /* Solves the special case of a Diophantine equation where CHREC_A is
4078    an affine bivariate function, and CHREC_B is an affine univariate
4079    function.  For example,
4080
4081    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4082
4083    has the following overlapping functions:
4084
4085    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4086    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4087    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4088
4089    FORNOW: This is a specialized implementation for a case occurring in
4090    a common benchmark.  Implement the general algorithm.  */
4091
4092 static void
4093 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
4094                                       conflict_function **overlaps_a,
4095                                       conflict_function **overlaps_b,
4096                                       tree *last_conflicts)
4097 {
4098   bool xz_p, yz_p, xyz_p;
4099   HOST_WIDE_INT step_x, step_y, step_z;
4100   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
4101   affine_fn overlaps_a_xz, overlaps_b_xz;
4102   affine_fn overlaps_a_yz, overlaps_b_yz;
4103   affine_fn overlaps_a_xyz, overlaps_b_xyz;
4104   affine_fn ova1, ova2, ovb;
4105   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
4106
4107   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
4108   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
4109   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
4110
4111   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
4112   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
4113   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
4114
4115   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
4116     {
4117       if (dump_file && (dump_flags & TDF_DETAILS))
4118         fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
4119
4120       *overlaps_a = conflict_fn_not_known ();
4121       *overlaps_b = conflict_fn_not_known ();
4122       *last_conflicts = chrec_dont_know;
4123       return;
4124     }
4125
4126   niter = MIN (niter_x, niter_z);
4127   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
4128                                            &overlaps_a_xz,
4129                                            &overlaps_b_xz,
4130                                            &last_conflicts_xz, 1);
4131   niter = MIN (niter_y, niter_z);
4132   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
4133                                            &overlaps_a_yz,
4134                                            &overlaps_b_yz,
4135                                            &last_conflicts_yz, 2);
4136   niter = MIN (niter_x, niter_z);
4137   niter = MIN (niter_y, niter);
4138   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
4139                                            &overlaps_a_xyz,
4140                                            &overlaps_b_xyz,
4141                                            &last_conflicts_xyz, 3);
4142
4143   xz_p = !integer_zerop (last_conflicts_xz);
4144   yz_p = !integer_zerop (last_conflicts_yz);
4145   xyz_p = !integer_zerop (last_conflicts_xyz);
4146
4147   if (xz_p || yz_p || xyz_p)
4148     {
4149       ova1 = affine_fn_cst (integer_zero_node);
4150       ova2 = affine_fn_cst (integer_zero_node);
4151       ovb = affine_fn_cst (integer_zero_node);
4152       if (xz_p)
4153         {
4154           affine_fn t0 = ova1;
4155           affine_fn t2 = ovb;
4156
4157           ova1 = affine_fn_plus (ova1, overlaps_a_xz);
4158           ovb = affine_fn_plus (ovb, overlaps_b_xz);
4159           affine_fn_free (t0);
4160           affine_fn_free (t2);
4161           *last_conflicts = last_conflicts_xz;
4162         }
4163       if (yz_p)
4164         {
4165           affine_fn t0 = ova2;
4166           affine_fn t2 = ovb;
4167
4168           ova2 = affine_fn_plus (ova2, overlaps_a_yz);
4169           ovb = affine_fn_plus (ovb, overlaps_b_yz);
4170           affine_fn_free (t0);
4171           affine_fn_free (t2);
4172           *last_conflicts = last_conflicts_yz;
4173         }
4174       if (xyz_p)
4175         {
4176           affine_fn t0 = ova1;
4177           affine_fn t2 = ova2;
4178           affine_fn t4 = ovb;
4179
4180           ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
4181           ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
4182           ovb = affine_fn_plus (ovb, overlaps_b_xyz);
4183           affine_fn_free (t0);
4184           affine_fn_free (t2);
4185           affine_fn_free (t4);
4186           *last_conflicts = last_conflicts_xyz;
4187         }
4188       *overlaps_a = conflict_fn (2, ova1, ova2);
4189       *overlaps_b = conflict_fn (1, ovb);
4190     }
4191   else
4192     {
4193       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4194       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4195       *last_conflicts = integer_zero_node;
4196     }
4197
4198   affine_fn_free (overlaps_a_xz);
4199   affine_fn_free (overlaps_b_xz);
4200   affine_fn_free (overlaps_a_yz);
4201   affine_fn_free (overlaps_b_yz);
4202   affine_fn_free (overlaps_a_xyz);
4203   affine_fn_free (overlaps_b_xyz);
4204 }
4205
4206 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
4207
4208 static void
4209 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
4210                     int size)
4211 {
4212   memcpy (vec2, vec1, size * sizeof (*vec1));
4213 }
4214
4215 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
4216
4217 static void
4218 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
4219                     int m, int n)
4220 {
4221   int i;
4222
4223   for (i = 0; i < m; i++)
4224     lambda_vector_copy (mat1[i], mat2[i], n);
4225 }
4226
4227 /* Store the N x N identity matrix in MAT.  */
4228
4229 static void
4230 lambda_matrix_id (lambda_matrix mat, int size)
4231 {
4232   int i, j;
4233
4234   for (i = 0; i < size; i++)
4235     for (j = 0; j < size; j++)
4236       mat[i][j] = (i == j) ? 1 : 0;
4237 }
4238
4239 /* Return the index of the first nonzero element of vector VEC1 between
4240    START and N.  We must have START <= N.
4241    Returns N if VEC1 is the zero vector.  */
4242
4243 static int
4244 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
4245 {
4246   int j = start;
4247   while (j < n && vec1[j] == 0)
4248     j++;
4249   return j;
4250 }
4251
4252 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4253    R2 = R2 + CONST1 * R1.  */
4254
4255 static bool
4256 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
4257                        lambda_int const1)
4258 {
4259   int i;
4260
4261   if (const1 == 0)
4262     return true;
4263
4264   for (i = 0; i < n; i++)
4265     {
4266       bool ovf;
4267       lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
4268       if (ovf)
4269         return false;
4270       lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
4271       if (ovf || tem2 == HOST_WIDE_INT_MIN)
4272         return false;
4273       mat[r2][i] = tem2;
4274     }
4275
4276   return true;
4277 }
4278
4279 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
4280    and store the result in VEC2.  */
4281
4282 static void
4283 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
4284                           int size, lambda_int const1)
4285 {
4286   int i;
4287
4288   if (const1 == 0)
4289     lambda_vector_clear (vec2, size);
4290   else
4291     for (i = 0; i < size; i++)
4292       vec2[i] = const1 * vec1[i];
4293 }
4294
4295 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
4296
4297 static void
4298 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
4299                       int size)
4300 {
4301   lambda_vector_mult_const (vec1, vec2, size, -1);
4302 }
4303
4304 /* Negate row R1 of matrix MAT which has N columns.  */
4305
4306 static void
4307 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
4308 {
4309   lambda_vector_negate (mat[r1], mat[r1], n);
4310 }
4311
4312 /* Return true if two vectors are equal.  */
4313
4314 static bool
4315 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
4316 {
4317   int i;
4318   for (i = 0; i < size; i++)
4319     if (vec1[i] != vec2[i])
4320       return false;
4321   return true;
4322 }
4323
4324 /* Given an M x N integer matrix A, this function determines an M x
4325    M unimodular matrix U, and an M x N echelon matrix S such that
4326    "U.A = S".  This decomposition is also known as "right Hermite".
4327
4328    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4329    Restructuring Compilers" Utpal Banerjee.  */
4330
4331 static bool
4332 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4333                              lambda_matrix S, lambda_matrix U)
4334 {
4335   int i, j, i0 = 0;
4336
4337   lambda_matrix_copy (A, S, m, n);
4338   lambda_matrix_id (U, m);
4339
4340   for (j = 0; j < n; j++)
4341     {
4342       if (lambda_vector_first_nz (S[j], m, i0) < m)
4343         {
4344           ++i0;
4345           for (i = m - 1; i >= i0; i--)
4346             {
4347               while (S[i][j] != 0)
4348                 {
4349                   lambda_int factor, a, b;
4350
4351                   a = S[i-1][j];
4352                   b = S[i][j];
4353                   gcc_assert (a != HOST_WIDE_INT_MIN);
4354                   factor = a / b;
4355
4356                   if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
4357                     return false;
4358                   std::swap (S[i], S[i-1]);
4359
4360                   if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
4361                     return false;
4362                   std::swap (U[i], U[i-1]);
4363                 }
4364             }
4365         }
4366     }
4367
4368   return true;
4369 }
4370
4371 /* Determines the overlapping elements due to accesses CHREC_A and
4372    CHREC_B, that are affine functions.  This function cannot handle
4373    symbolic evolution functions, ie. when initial conditions are
4374    parameters, because it uses lambda matrices of integers.  */
4375
4376 static void
4377 analyze_subscript_affine_affine (tree chrec_a,
4378                                  tree chrec_b,
4379                                  conflict_function **overlaps_a,
4380                                  conflict_function **overlaps_b,
4381                                  tree *last_conflicts)
4382 {
4383   unsigned nb_vars_a, nb_vars_b, dim;
4384   lambda_int gamma, gcd_alpha_beta;
4385   lambda_matrix A, U, S;
4386   struct obstack scratch_obstack;
4387
4388   if (eq_evolutions_p (chrec_a, chrec_b))
4389     {
4390       /* The accessed index overlaps for each iteration in the
4391          loop.  */
4392       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4393       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4394       *last_conflicts = chrec_dont_know;
4395       return;
4396     }
4397   if (dump_file && (dump_flags & TDF_DETAILS))
4398     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4399
4400   /* For determining the initial intersection, we have to solve a
4401      Diophantine equation.  This is the most time consuming part.
4402
4403      For answering to the question: "Is there a dependence?" we have
4404      to prove that there exists a solution to the Diophantine
4405      equation, and that the solution is in the iteration domain,
4406      i.e. the solution is positive or zero, and that the solution
4407      happens before the upper bound loop.nb_iterations.  Otherwise
4408      there is no dependence.  This function outputs a description of
4409      the iterations that hold the intersections.  */
4410
4411   nb_vars_a = nb_vars_in_chrec (chrec_a);
4412   nb_vars_b = nb_vars_in_chrec (chrec_b);
4413
4414   gcc_obstack_init (&scratch_obstack);
4415
4416   dim = nb_vars_a + nb_vars_b;
4417   U = lambda_matrix_new (dim, dim, &scratch_obstack);
4418   A = lambda_matrix_new (dim, 1, &scratch_obstack);
4419   S = lambda_matrix_new (dim, 1, &scratch_obstack);
4420
4421   tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4422   tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4423   if (init_a == chrec_dont_know
4424       || init_b == chrec_dont_know)
4425     {
4426       if (dump_file && (dump_flags & TDF_DETAILS))
4427         fprintf (dump_file, "affine-affine test failed: "
4428                  "representation issue.\n");
4429       *overlaps_a = conflict_fn_not_known ();
4430       *overlaps_b = conflict_fn_not_known ();
4431       *last_conflicts = chrec_dont_know;
4432       goto end_analyze_subs_aa;
4433     }
4434   gamma = int_cst_value (init_b) - int_cst_value (init_a);
4435
4436   /* Don't do all the hard work of solving the Diophantine equation
4437      when we already know the solution: for example,
4438      | {3, +, 1}_1
4439      | {3, +, 4}_2
4440      | gamma = 3 - 3 = 0.
4441      Then the first overlap occurs during the first iterations:
4442      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4443   */
4444   if (gamma == 0)
4445     {
4446       if (nb_vars_a == 1 && nb_vars_b == 1)
4447         {
4448           HOST_WIDE_INT step_a, step_b;
4449           HOST_WIDE_INT niter, niter_a, niter_b;
4450           affine_fn ova, ovb;
4451
4452           niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4453           niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4454           niter = MIN (niter_a, niter_b);
4455           step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4456           step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4457
4458           compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4459                                                    &ova, &ovb,
4460                                                    last_conflicts, 1);
4461           *overlaps_a = conflict_fn (1, ova);
4462           *overlaps_b = conflict_fn (1, ovb);
4463         }
4464
4465       else if (nb_vars_a == 2 && nb_vars_b == 1)
4466         compute_overlap_steps_for_affine_1_2
4467           (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4468
4469       else if (nb_vars_a == 1 && nb_vars_b == 2)
4470         compute_overlap_steps_for_affine_1_2
4471           (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4472
4473       else
4474         {
4475           if (dump_file && (dump_flags & TDF_DETAILS))
4476             fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4477           *overlaps_a = conflict_fn_not_known ();
4478           *overlaps_b = conflict_fn_not_known ();
4479           *last_conflicts = chrec_dont_know;
4480         }
4481       goto end_analyze_subs_aa;
4482     }
4483
4484   /* U.A = S */
4485   if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
4486     {
4487       *overlaps_a = conflict_fn_not_known ();
4488       *overlaps_b = conflict_fn_not_known ();
4489       *last_conflicts = chrec_dont_know;
4490       goto end_analyze_subs_aa;
4491     }
4492
4493   if (S[0][0] < 0)
4494     {
4495       S[0][0] *= -1;
4496       lambda_matrix_row_negate (U, dim, 0);
4497     }
4498   gcd_alpha_beta = S[0][0];
4499
4500   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4501      but that is a quite strange case.  Instead of ICEing, answer
4502      don't know.  */
4503   if (gcd_alpha_beta == 0)
4504     {
4505       *overlaps_a = conflict_fn_not_known ();
4506       *overlaps_b = conflict_fn_not_known ();
4507       *last_conflicts = chrec_dont_know;
4508       goto end_analyze_subs_aa;
4509     }
4510
4511   /* The classic "gcd-test".  */
4512   if (!int_divides_p (gcd_alpha_beta, gamma))
4513     {
4514       /* The "gcd-test" has determined that there is no integer
4515          solution, i.e. there is no dependence.  */
4516       *overlaps_a = conflict_fn_no_dependence ();
4517       *overlaps_b = conflict_fn_no_dependence ();
4518       *last_conflicts = integer_zero_node;
4519     }
4520
4521   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
4522   else if (nb_vars_a == 1 && nb_vars_b == 1)
4523     {
4524       /* Both functions should have the same evolution sign.  */
4525       if (((A[0][0] > 0 && -A[1][0] > 0)
4526            || (A[0][0] < 0 && -A[1][0] < 0)))
4527         {
4528           /* The solutions are given by:
4529              |
4530              | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
4531              |                           [u21 u22]    [y0]
4532
4533              For a given integer t.  Using the following variables,
4534
4535              | i0 = u11 * gamma / gcd_alpha_beta
4536              | j0 = u12 * gamma / gcd_alpha_beta
4537              | i1 = u21
4538              | j1 = u22
4539
4540              the solutions are:
4541
4542              | x0 = i0 + i1 * t,
4543              | y0 = j0 + j1 * t.  */
4544           HOST_WIDE_INT i0, j0, i1, j1;
4545
4546           i0 = U[0][0] * gamma / gcd_alpha_beta;
4547           j0 = U[0][1] * gamma / gcd_alpha_beta;
4548           i1 = U[1][0];
4549           j1 = U[1][1];
4550
4551           if ((i1 == 0 && i0 < 0)
4552               || (j1 == 0 && j0 < 0))
4553             {
4554               /* There is no solution.
4555                  FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4556                  falls in here, but for the moment we don't look at the
4557                  upper bound of the iteration domain.  */
4558               *overlaps_a = conflict_fn_no_dependence ();
4559               *overlaps_b = conflict_fn_no_dependence ();
4560               *last_conflicts = integer_zero_node;
4561               goto end_analyze_subs_aa;
4562             }
4563
4564           if (i1 > 0 && j1 > 0)
4565             {
4566               HOST_WIDE_INT niter_a
4567                 = max_stmt_executions_int (get_chrec_loop (chrec_a));
4568               HOST_WIDE_INT niter_b
4569                 = max_stmt_executions_int (get_chrec_loop (chrec_b));
4570               HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4571
4572               /* (X0, Y0) is a solution of the Diophantine equation:
4573                  "chrec_a (X0) = chrec_b (Y0)".  */
4574               HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4575                                         CEIL (-j0, j1));
4576               HOST_WIDE_INT x0 = i1 * tau1 + i0;
4577               HOST_WIDE_INT y0 = j1 * tau1 + j0;
4578
4579               /* (X1, Y1) is the smallest positive solution of the eq
4580                  "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4581                  first conflict occurs.  */
4582               HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4583               HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4584               HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4585
4586               if (niter > 0)
4587                 {
4588                   /* If the overlap occurs outside of the bounds of the
4589                      loop, there is no dependence.  */
4590                   if (x1 >= niter_a || y1 >= niter_b)
4591                     {
4592                       *overlaps_a = conflict_fn_no_dependence ();
4593                       *overlaps_b = conflict_fn_no_dependence ();
4594                       *last_conflicts = integer_zero_node;
4595                       goto end_analyze_subs_aa;
4596                     }
4597
4598                   /* max stmt executions can get quite large, avoid
4599                      overflows by using wide ints here.  */
4600                   widest_int tau2
4601                     = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4602                                 wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4603                   widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4604                   if (wi::min_precision (last_conflict, SIGNED)
4605                       <= TYPE_PRECISION (integer_type_node))
4606                     *last_conflicts
4607                        = build_int_cst (integer_type_node,
4608                                         last_conflict.to_shwi ());
4609                   else
4610                     *last_conflicts = chrec_dont_know;
4611                 }
4612               else
4613                 *last_conflicts = chrec_dont_know;
4614
4615               *overlaps_a
4616                 = conflict_fn (1,
4617                                affine_fn_univar (build_int_cst (NULL_TREE, x1),
4618                                                  1,
4619                                                  build_int_cst (NULL_TREE, i1)));
4620               *overlaps_b
4621                 = conflict_fn (1,
4622                                affine_fn_univar (build_int_cst (NULL_TREE, y1),
4623                                                  1,
4624                                                  build_int_cst (NULL_TREE, j1)));
4625             }
4626           else
4627             {
4628               /* FIXME: For the moment, the upper bound of the
4629                  iteration domain for i and j is not checked.  */
4630               if (dump_file && (dump_flags & TDF_DETAILS))
4631                 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4632               *overlaps_a = conflict_fn_not_known ();
4633               *overlaps_b = conflict_fn_not_known ();
4634               *last_conflicts = chrec_dont_know;
4635             }
4636         }
4637       else
4638         {
4639           if (dump_file && (dump_flags & TDF_DETAILS))
4640             fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4641           *overlaps_a = conflict_fn_not_known ();
4642           *overlaps_b = conflict_fn_not_known ();
4643           *last_conflicts = chrec_dont_know;
4644         }
4645     }
4646   else
4647     {
4648       if (dump_file && (dump_flags & TDF_DETAILS))
4649         fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4650       *overlaps_a = conflict_fn_not_known ();
4651       *overlaps_b = conflict_fn_not_known ();
4652       *last_conflicts = chrec_dont_know;
4653     }
4654
4655 end_analyze_subs_aa:
4656   obstack_free (&scratch_obstack, NULL);
4657   if (dump_file && (dump_flags & TDF_DETAILS))
4658     {
4659       fprintf (dump_file, "  (overlaps_a = ");
4660       dump_conflict_function (dump_file, *overlaps_a);
4661       fprintf (dump_file, ")\n  (overlaps_b = ");
4662       dump_conflict_function (dump_file, *overlaps_b);
4663       fprintf (dump_file, "))\n");
4664     }
4665 }
4666
4667 /* Returns true when analyze_subscript_affine_affine can be used for
4668    determining the dependence relation between chrec_a and chrec_b,
4669    that contain symbols.  This function modifies chrec_a and chrec_b
4670    such that the analysis result is the same, and such that they don't
4671    contain symbols, and then can safely be passed to the analyzer.
4672
4673    Example: The analysis of the following tuples of evolutions produce
4674    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4675    vs. {0, +, 1}_1
4676
4677    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4678    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4679 */
4680
4681 static bool
4682 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4683 {
4684   tree diff, type, left_a, left_b, right_b;
4685
4686   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4687       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4688     /* FIXME: For the moment not handled.  Might be refined later.  */
4689     return false;
4690
4691   type = chrec_type (*chrec_a);
4692   left_a = CHREC_LEFT (*chrec_a);
4693   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4694   diff = chrec_fold_minus (type, left_a, left_b);
4695
4696   if (!evolution_function_is_constant_p (diff))
4697     return false;
4698
4699   if (dump_file && (dump_flags & TDF_DETAILS))
4700     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4701
4702   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4703                                      diff, CHREC_RIGHT (*chrec_a));
4704   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4705   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4706                                      build_int_cst (type, 0),
4707                                      right_b);
4708   return true;
4709 }
4710
4711 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
4712    *OVERLAPS_B are initialized to the functions that describe the
4713    relation between the elements accessed twice by CHREC_A and
4714    CHREC_B.  For k >= 0, the following property is verified:
4715
4716    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4717
4718 static void
4719 analyze_siv_subscript (tree chrec_a,
4720                        tree chrec_b,
4721                        conflict_function **overlaps_a,
4722                        conflict_function **overlaps_b,
4723                        tree *last_conflicts,
4724                        int loop_nest_num)
4725 {
4726   dependence_stats.num_siv++;
4727
4728   if (dump_file && (dump_flags & TDF_DETAILS))
4729     fprintf (dump_file, "(analyze_siv_subscript \n");
4730
4731   if (evolution_function_is_constant_p (chrec_a)
4732       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4733     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4734                                       overlaps_a, overlaps_b, last_conflicts);
4735
4736   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4737            && evolution_function_is_constant_p (chrec_b))
4738     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4739                                       overlaps_b, overlaps_a, last_conflicts);
4740
4741   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4742            && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4743     {
4744       if (!chrec_contains_symbols (chrec_a)
4745           && !chrec_contains_symbols (chrec_b))
4746         {
4747           analyze_subscript_affine_affine (chrec_a, chrec_b,
4748                                            overlaps_a, overlaps_b,
4749                                            last_conflicts);
4750
4751           if (CF_NOT_KNOWN_P (*overlaps_a)
4752               || CF_NOT_KNOWN_P (*overlaps_b))
4753             dependence_stats.num_siv_unimplemented++;
4754           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4755                    || CF_NO_DEPENDENCE_P (*overlaps_b))
4756             dependence_stats.num_siv_independent++;
4757           else
4758             dependence_stats.num_siv_dependent++;
4759         }
4760       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4761                                                         &chrec_b))
4762         {
4763           analyze_subscript_affine_affine (chrec_a, chrec_b,
4764                                            overlaps_a, overlaps_b,
4765                                            last_conflicts);
4766
4767           if (CF_NOT_KNOWN_P (*overlaps_a)
4768               || CF_NOT_KNOWN_P (*overlaps_b))
4769             dependence_stats.num_siv_unimplemented++;
4770           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4771                    || CF_NO_DEPENDENCE_P (*overlaps_b))
4772             dependence_stats.num_siv_independent++;
4773           else
4774             dependence_stats.num_siv_dependent++;
4775         }
4776       else
4777         goto siv_subscript_dontknow;
4778     }
4779
4780   else
4781     {
4782     siv_subscript_dontknow:;
4783       if (dump_file && (dump_flags & TDF_DETAILS))
4784         fprintf (dump_file, "  siv test failed: unimplemented");
4785       *overlaps_a = conflict_fn_not_known ();
4786       *overlaps_b = conflict_fn_not_known ();
4787       *last_conflicts = chrec_dont_know;
4788       dependence_stats.num_siv_unimplemented++;
4789     }
4790
4791   if (dump_file && (dump_flags & TDF_DETAILS))
4792     fprintf (dump_file, ")\n");
4793 }
4794
4795 /* Returns false if we can prove that the greatest common divisor of the steps
4796    of CHREC does not divide CST, false otherwise.  */
4797
4798 static bool
4799 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4800 {
4801   HOST_WIDE_INT cd = 0, val;
4802   tree step;
4803
4804   if (!tree_fits_shwi_p (cst))
4805     return true;
4806   val = tree_to_shwi (cst);
4807
4808   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4809     {
4810       step = CHREC_RIGHT (chrec);
4811       if (!tree_fits_shwi_p (step))
4812         return true;
4813       cd = gcd (cd, tree_to_shwi (step));
4814       chrec = CHREC_LEFT (chrec);
4815     }
4816
4817   return val % cd == 0;
4818 }
4819
4820 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4821    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
4822    functions that describe the relation between the elements accessed
4823    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
4824    is verified:
4825
4826    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4827
4828 static void
4829 analyze_miv_subscript (tree chrec_a,
4830                        tree chrec_b,
4831                        conflict_function **overlaps_a,
4832                        conflict_function **overlaps_b,
4833                        tree *last_conflicts,
4834                        class loop *loop_nest)
4835 {
4836   tree type, difference;
4837
4838   dependence_stats.num_miv++;
4839   if (dump_file && (dump_flags & TDF_DETAILS))
4840     fprintf (dump_file, "(analyze_miv_subscript \n");
4841
4842   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4843   chrec_a = chrec_convert (type, chrec_a, NULL);
4844   chrec_b = chrec_convert (type, chrec_b, NULL);
4845   difference = chrec_fold_minus (type, chrec_a, chrec_b);
4846
4847   if (eq_evolutions_p (chrec_a, chrec_b))
4848     {
4849       /* Access functions are the same: all the elements are accessed
4850          in the same order.  */
4851       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4852       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4853       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4854       dependence_stats.num_miv_dependent++;
4855     }
4856
4857   else if (evolution_function_is_constant_p (difference)
4858            && evolution_function_is_affine_multivariate_p (chrec_a,
4859                                                            loop_nest->num)
4860            && !gcd_of_steps_may_divide_p (chrec_a, difference))
4861     {
4862       /* testsuite/.../ssa-chrec-33.c
4863          {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
4864
4865          The difference is 1, and all the evolution steps are multiples
4866          of 2, consequently there are no overlapping elements.  */
4867       *overlaps_a = conflict_fn_no_dependence ();
4868       *overlaps_b = conflict_fn_no_dependence ();
4869       *last_conflicts = integer_zero_node;
4870       dependence_stats.num_miv_independent++;
4871     }
4872
4873   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4874            && !chrec_contains_symbols (chrec_a, loop_nest)
4875            && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4876            && !chrec_contains_symbols (chrec_b, loop_nest))
4877     {
4878       /* testsuite/.../ssa-chrec-35.c
4879          {0, +, 1}_2  vs.  {0, +, 1}_3
4880          the overlapping elements are respectively located at iterations:
4881          {0, +, 1}_x and {0, +, 1}_x,
4882          in other words, we have the equality:
4883          {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4884
4885          Other examples:
4886          {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4887          {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4888
4889          {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4890          {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4891       */
4892       analyze_subscript_affine_affine (chrec_a, chrec_b,
4893                                        overlaps_a, overlaps_b, last_conflicts);
4894
4895       if (CF_NOT_KNOWN_P (*overlaps_a)
4896           || CF_NOT_KNOWN_P (*overlaps_b))
4897         dependence_stats.num_miv_unimplemented++;
4898       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4899                || CF_NO_DEPENDENCE_P (*overlaps_b))
4900         dependence_stats.num_miv_independent++;
4901       else
4902         dependence_stats.num_miv_dependent++;
4903     }
4904
4905   else
4906     {
4907       /* When the analysis is too difficult, answer "don't know".  */
4908       if (dump_file && (dump_flags & TDF_DETAILS))
4909         fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4910
4911       *overlaps_a = conflict_fn_not_known ();
4912       *overlaps_b = conflict_fn_not_known ();
4913       *last_conflicts = chrec_dont_know;
4914       dependence_stats.num_miv_unimplemented++;
4915     }
4916
4917   if (dump_file && (dump_flags & TDF_DETAILS))
4918     fprintf (dump_file, ")\n");
4919 }
4920
4921 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4922    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
4923    OVERLAP_ITERATIONS_B are initialized with two functions that
4924    describe the iterations that contain conflicting elements.
4925
4926    Remark: For an integer k >= 0, the following equality is true:
4927
4928    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4929 */
4930
4931 static void
4932 analyze_overlapping_iterations (tree chrec_a,
4933                                 tree chrec_b,
4934                                 conflict_function **overlap_iterations_a,
4935                                 conflict_function **overlap_iterations_b,
4936                                 tree *last_conflicts, class loop *loop_nest)
4937 {
4938   unsigned int lnn = loop_nest->num;
4939
4940   dependence_stats.num_subscript_tests++;
4941
4942   if (dump_file && (dump_flags & TDF_DETAILS))
4943     {
4944       fprintf (dump_file, "(analyze_overlapping_iterations \n");
4945       fprintf (dump_file, "  (chrec_a = ");
4946       print_generic_expr (dump_file, chrec_a);
4947       fprintf (dump_file, ")\n  (chrec_b = ");
4948       print_generic_expr (dump_file, chrec_b);
4949       fprintf (dump_file, ")\n");
4950     }
4951
4952   if (chrec_a == NULL_TREE
4953       || chrec_b == NULL_TREE
4954       || chrec_contains_undetermined (chrec_a)
4955       || chrec_contains_undetermined (chrec_b))
4956     {
4957       dependence_stats.num_subscript_undetermined++;
4958
4959       *overlap_iterations_a = conflict_fn_not_known ();
4960       *overlap_iterations_b = conflict_fn_not_known ();
4961     }
4962
4963   /* If they are the same chrec, and are affine, they overlap
4964      on every iteration.  */
4965   else if (eq_evolutions_p (chrec_a, chrec_b)
4966            && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4967                || operand_equal_p (chrec_a, chrec_b, 0)))
4968     {
4969       dependence_stats.num_same_subscript_function++;
4970       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4971       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4972       *last_conflicts = chrec_dont_know;
4973     }
4974
4975   /* If they aren't the same, and aren't affine, we can't do anything
4976      yet.  */
4977   else if ((chrec_contains_symbols (chrec_a)
4978             || chrec_contains_symbols (chrec_b))
4979            && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4980                || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4981     {
4982       dependence_stats.num_subscript_undetermined++;
4983       *overlap_iterations_a = conflict_fn_not_known ();
4984       *overlap_iterations_b = conflict_fn_not_known ();
4985     }
4986
4987   else if (ziv_subscript_p (chrec_a, chrec_b))
4988     analyze_ziv_subscript (chrec_a, chrec_b,
4989                            overlap_iterations_a, overlap_iterations_b,
4990                            last_conflicts);
4991
4992   else if (siv_subscript_p (chrec_a, chrec_b))
4993     analyze_siv_subscript (chrec_a, chrec_b,
4994                            overlap_iterations_a, overlap_iterations_b,
4995                            last_conflicts, lnn);
4996
4997   else
4998     analyze_miv_subscript (chrec_a, chrec_b,
4999                            overlap_iterations_a, overlap_iterations_b,
5000                            last_conflicts, loop_nest);
5001
5002   if (dump_file && (dump_flags & TDF_DETAILS))
5003     {
5004       fprintf (dump_file, "  (overlap_iterations_a = ");
5005       dump_conflict_function (dump_file, *overlap_iterations_a);
5006       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
5007       dump_conflict_function (dump_file, *overlap_iterations_b);
5008       fprintf (dump_file, "))\n");
5009     }
5010 }
5011
5012 /* Helper function for uniquely inserting distance vectors.  */
5013
5014 static void
5015 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
5016 {
5017   for (lambda_vector v : DDR_DIST_VECTS (ddr))
5018     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
5019       return;
5020
5021   DDR_DIST_VECTS (ddr).safe_push (dist_v);
5022 }
5023
5024 /* Helper function for uniquely inserting direction vectors.  */
5025
5026 static void
5027 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
5028 {
5029   for (lambda_vector v : DDR_DIR_VECTS (ddr))
5030     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
5031       return;
5032
5033   DDR_DIR_VECTS (ddr).safe_push (dir_v);
5034 }
5035
5036 /* Add a distance of 1 on all the loops outer than INDEX.  If we
5037    haven't yet determined a distance for this outer loop, push a new
5038    distance vector composed of the previous distance, and a distance
5039    of 1 for this outer loop.  Example:
5040
5041    | loop_1
5042    |   loop_2
5043    |     A[10]
5044    |   endloop_2
5045    | endloop_1
5046
5047    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
5048    save (0, 1), then we have to save (1, 0).  */
5049
5050 static void
5051 add_outer_distances (struct data_dependence_relation *ddr,
5052                      lambda_vector dist_v, int index)
5053 {
5054   /* For each outer loop where init_v is not set, the accesses are
5055      in dependence of distance 1 in the loop.  */
5056   while (--index >= 0)
5057     {
5058       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5059       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5060       save_v[index] = 1;
5061       save_dist_v (ddr, save_v);
5062     }
5063 }
5064
5065 /* Return false when fail to represent the data dependence as a
5066    distance vector.  A_INDEX is the index of the first reference
5067    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5068    second reference.  INIT_B is set to true when a component has been
5069    added to the distance vector DIST_V.  INDEX_CARRY is then set to
5070    the index in DIST_V that carries the dependence.  */
5071
5072 static bool
5073 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
5074                              unsigned int a_index, unsigned int b_index,
5075                              lambda_vector dist_v, bool *init_b,
5076                              int *index_carry)
5077 {
5078   unsigned i;
5079   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5080   class loop *loop = DDR_LOOP_NEST (ddr)[0];
5081
5082   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5083     {
5084       tree access_fn_a, access_fn_b;
5085       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
5086
5087       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5088         {
5089           non_affine_dependence_relation (ddr);
5090           return false;
5091         }
5092
5093       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
5094       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
5095
5096       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
5097           && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
5098         {
5099           HOST_WIDE_INT dist;
5100           int index;
5101           int var_a = CHREC_VARIABLE (access_fn_a);
5102           int var_b = CHREC_VARIABLE (access_fn_b);
5103
5104           if (var_a != var_b
5105               || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5106             {
5107               non_affine_dependence_relation (ddr);
5108               return false;
5109             }
5110
5111           /* When data references are collected in a loop while data
5112              dependences are analyzed in loop nest nested in the loop, we
5113              would have more number of access functions than number of
5114              loops.  Skip access functions of loops not in the loop nest.
5115
5116              See PR89725 for more information.  */
5117           if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
5118             continue;
5119
5120           dist = int_cst_value (SUB_DISTANCE (subscript));
5121           index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
5122           *index_carry = MIN (index, *index_carry);
5123
5124           /* This is the subscript coupling test.  If we have already
5125              recorded a distance for this loop (a distance coming from
5126              another subscript), it should be the same.  For example,
5127              in the following code, there is no dependence:
5128
5129              | loop i = 0, N, 1
5130              |   T[i+1][i] = ...
5131              |   ... = T[i][i]
5132              | endloop
5133           */
5134           if (init_v[index] != 0 && dist_v[index] != dist)
5135             {
5136               finalize_ddr_dependent (ddr, chrec_known);
5137               return false;
5138             }
5139
5140           dist_v[index] = dist;
5141           init_v[index] = 1;
5142           *init_b = true;
5143         }
5144       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
5145         {
5146           /* This can be for example an affine vs. constant dependence
5147              (T[i] vs. T[3]) that is not an affine dependence and is
5148              not representable as a distance vector.  */
5149           non_affine_dependence_relation (ddr);
5150           return false;
5151         }
5152       else
5153         *init_b = true;
5154     }
5155
5156   return true;
5157 }
5158
5159 /* Return true when the DDR contains only invariant access functions wrto. loop
5160    number LNUM.  */
5161
5162 static bool
5163 invariant_access_functions (const struct data_dependence_relation *ddr,
5164                             int lnum)
5165 {
5166   for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5167     if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
5168         || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
5169       return false;
5170
5171   return true;
5172 }
5173
5174 /* Helper function for the case where DDR_A and DDR_B are the same
5175    multivariate access function with a constant step.  For an example
5176    see pr34635-1.c.  */
5177
5178 static void
5179 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
5180 {
5181   int x_1, x_2;
5182   tree c_1 = CHREC_LEFT (c_2);
5183   tree c_0 = CHREC_LEFT (c_1);
5184   lambda_vector dist_v;
5185   HOST_WIDE_INT v1, v2, cd;
5186
5187   /* Polynomials with more than 2 variables are not handled yet.  When
5188      the evolution steps are parameters, it is not possible to
5189      represent the dependence using classical distance vectors.  */
5190   if (TREE_CODE (c_0) != INTEGER_CST
5191       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
5192       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
5193     {
5194       DDR_AFFINE_P (ddr) = false;
5195       return;
5196     }
5197
5198   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
5199   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
5200
5201   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
5202   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5203   v1 = int_cst_value (CHREC_RIGHT (c_1));
5204   v2 = int_cst_value (CHREC_RIGHT (c_2));
5205   cd = gcd (v1, v2);
5206   v1 /= cd;
5207   v2 /= cd;
5208
5209   if (v2 < 0)
5210     {
5211       v2 = -v2;
5212       v1 = -v1;
5213     }
5214
5215   dist_v[x_1] = v2;
5216   dist_v[x_2] = -v1;
5217   save_dist_v (ddr, dist_v);
5218
5219   add_outer_distances (ddr, dist_v, x_1);
5220 }
5221
5222 /* Helper function for the case where DDR_A and DDR_B are the same
5223    access functions.  */
5224
5225 static void
5226 add_other_self_distances (struct data_dependence_relation *ddr)
5227 {
5228   lambda_vector dist_v;
5229   unsigned i;
5230   int index_carry = DDR_NB_LOOPS (ddr);
5231   subscript *sub;
5232   class loop *loop = DDR_LOOP_NEST (ddr)[0];
5233
5234   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5235     {
5236       tree access_fun = SUB_ACCESS_FN (sub, 0);
5237
5238       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
5239         {
5240           if (!evolution_function_is_univariate_p (access_fun, loop->num))
5241             {
5242               if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
5243                 {
5244                   DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
5245                   return;
5246                 }
5247
5248               access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
5249
5250               if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
5251                 add_multivariate_self_dist (ddr, access_fun);
5252               else
5253                 /* The evolution step is not constant: it varies in
5254                    the outer loop, so this cannot be represented by a
5255                    distance vector.  For example in pr34635.c the
5256                    evolution is {0, +, {0, +, 4}_1}_2.  */
5257                 DDR_AFFINE_P (ddr) = false;
5258
5259               return;
5260             }
5261
5262           /* When data references are collected in a loop while data
5263              dependences are analyzed in loop nest nested in the loop, we
5264              would have more number of access functions than number of
5265              loops.  Skip access functions of loops not in the loop nest.
5266
5267              See PR89725 for more information.  */
5268           if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
5269                                   loop))
5270             continue;
5271
5272           index_carry = MIN (index_carry,
5273                              index_in_loop_nest (CHREC_VARIABLE (access_fun),
5274                                                  DDR_LOOP_NEST (ddr)));
5275         }
5276     }
5277
5278   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5279   add_outer_distances (ddr, dist_v, index_carry);
5280 }
5281
5282 static void
5283 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
5284 {
5285   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5286
5287   dist_v[0] = 1;
5288   save_dist_v (ddr, dist_v);
5289 }
5290
5291 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
5292    is the case for example when access functions are the same and
5293    equal to a constant, as in:
5294
5295    | loop_1
5296    |   A[3] = ...
5297    |   ... = A[3]
5298    | endloop_1
5299
5300    in which case the distance vectors are (0) and (1).  */
5301
5302 static void
5303 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
5304 {
5305   unsigned i, j;
5306
5307   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5308     {
5309       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
5310       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
5311       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
5312
5313       for (j = 0; j < ca->n; j++)
5314         if (affine_function_zero_p (ca->fns[j]))
5315           {
5316             insert_innermost_unit_dist_vector (ddr);
5317             return;
5318           }
5319
5320       for (j = 0; j < cb->n; j++)
5321         if (affine_function_zero_p (cb->fns[j]))
5322           {
5323             insert_innermost_unit_dist_vector (ddr);
5324             return;
5325           }
5326     }
5327 }
5328
5329 /* Return true when the DDR contains two data references that have the
5330    same access functions.  */
5331
5332 static inline bool
5333 same_access_functions (const struct data_dependence_relation *ddr)
5334 {
5335   for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5336     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5337                           SUB_ACCESS_FN (sub, 1)))
5338       return false;
5339
5340   return true;
5341 }
5342
5343 /* Compute the classic per loop distance vector.  DDR is the data
5344    dependence relation to build a vector from.  Return false when fail
5345    to represent the data dependence as a distance vector.  */
5346
5347 static bool
5348 build_classic_dist_vector (struct data_dependence_relation *ddr,
5349                            class loop *loop_nest)
5350 {
5351   bool init_b = false;
5352   int index_carry = DDR_NB_LOOPS (ddr);
5353   lambda_vector dist_v;
5354
5355   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5356     return false;
5357
5358   if (same_access_functions (ddr))
5359     {
5360       /* Save the 0 vector.  */
5361       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5362       save_dist_v (ddr, dist_v);
5363
5364       if (invariant_access_functions (ddr, loop_nest->num))
5365         add_distance_for_zero_overlaps (ddr);
5366
5367       if (DDR_NB_LOOPS (ddr) > 1)
5368         add_other_self_distances (ddr);
5369
5370       return true;
5371     }
5372
5373   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5374   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5375     return false;
5376
5377   /* Save the distance vector if we initialized one.  */
5378   if (init_b)
5379     {
5380       /* Verify a basic constraint: classic distance vectors should
5381          always be lexicographically positive.
5382
5383          Data references are collected in the order of execution of
5384          the program, thus for the following loop
5385
5386          | for (i = 1; i < 100; i++)
5387          |   for (j = 1; j < 100; j++)
5388          |     {
5389          |       t = T[j+1][i-1];  // A
5390          |       T[j][i] = t + 2;  // B
5391          |     }
5392
5393          references are collected following the direction of the wind:
5394          A then B.  The data dependence tests are performed also
5395          following this order, such that we're looking at the distance
5396          separating the elements accessed by A from the elements later
5397          accessed by B.  But in this example, the distance returned by
5398          test_dep (A, B) is lexicographically negative (-1, 1), that
5399          means that the access A occurs later than B with respect to
5400          the outer loop, ie. we're actually looking upwind.  In this
5401          case we solve test_dep (B, A) looking downwind to the
5402          lexicographically positive solution, that returns the
5403          distance vector (1, -1).  */
5404       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5405         {
5406           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5407           if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5408             return false;
5409           compute_subscript_distance (ddr);
5410           if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5411                                             &index_carry))
5412             return false;
5413           save_dist_v (ddr, save_v);
5414           DDR_REVERSED_P (ddr) = true;
5415
5416           /* In this case there is a dependence forward for all the
5417              outer loops:
5418
5419              | for (k = 1; k < 100; k++)
5420              |  for (i = 1; i < 100; i++)
5421              |   for (j = 1; j < 100; j++)
5422              |     {
5423              |       t = T[j+1][i-1];  // A
5424              |       T[j][i] = t + 2;  // B
5425              |     }
5426
5427              the vectors are:
5428              (0,  1, -1)
5429              (1,  1, -1)
5430              (1, -1,  1)
5431           */
5432           if (DDR_NB_LOOPS (ddr) > 1)
5433             {
5434               add_outer_distances (ddr, save_v, index_carry);
5435               add_outer_distances (ddr, dist_v, index_carry);
5436             }
5437         }
5438       else
5439         {
5440           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5441           lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5442
5443           if (DDR_NB_LOOPS (ddr) > 1)
5444             {
5445               lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5446
5447               if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5448                 return false;
5449               compute_subscript_distance (ddr);
5450               if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5451                                                 &index_carry))
5452                 return false;
5453
5454               save_dist_v (ddr, save_v);
5455               add_outer_distances (ddr, dist_v, index_carry);
5456               add_outer_distances (ddr, opposite_v, index_carry);
5457             }
5458           else
5459             save_dist_v (ddr, save_v);
5460         }
5461     }
5462   else
5463     {
5464       /* There is a distance of 1 on all the outer loops: Example:
5465          there is a dependence of distance 1 on loop_1 for the array A.
5466
5467          | loop_1
5468          |   A[5] = ...
5469          | endloop
5470       */
5471       add_outer_distances (ddr, dist_v,
5472                            lambda_vector_first_nz (dist_v,
5473                                                    DDR_NB_LOOPS (ddr), 0));
5474     }
5475
5476   if (dump_file && (dump_flags & TDF_DETAILS))
5477     {
5478       unsigned i;
5479
5480       fprintf (dump_file, "(build_classic_dist_vector\n");
5481       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5482         {
5483           fprintf (dump_file, "  dist_vector = (");
5484           print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5485                                DDR_NB_LOOPS (ddr));
5486           fprintf (dump_file, "  )\n");
5487         }
5488       fprintf (dump_file, ")\n");
5489     }
5490
5491   return true;
5492 }
5493
5494 /* Return the direction for a given distance.
5495    FIXME: Computing dir this way is suboptimal, since dir can catch
5496    cases that dist is unable to represent.  */
5497
5498 static inline enum data_dependence_direction
5499 dir_from_dist (int dist)
5500 {
5501   if (dist > 0)
5502     return dir_positive;
5503   else if (dist < 0)
5504     return dir_negative;
5505   else
5506     return dir_equal;
5507 }
5508
5509 /* Compute the classic per loop direction vector.  DDR is the data
5510    dependence relation to build a vector from.  */
5511
5512 static void
5513 build_classic_dir_vector (struct data_dependence_relation *ddr)
5514 {
5515   unsigned i, j;
5516   lambda_vector dist_v;
5517
5518   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5519     {
5520       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5521
5522       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5523         dir_v[j] = dir_from_dist (dist_v[j]);
5524
5525       save_dir_v (ddr, dir_v);
5526     }
5527 }
5528
5529 /* Helper function.  Returns true when there is a dependence between the
5530    data references.  A_INDEX is the index of the first reference (0 for
5531    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
5532
5533 static bool
5534 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5535                                unsigned int a_index, unsigned int b_index,
5536                                class loop *loop_nest)
5537 {
5538   unsigned int i;
5539   tree last_conflicts;
5540   struct subscript *subscript;
5541   tree res = NULL_TREE;
5542
5543   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5544     {
5545       conflict_function *overlaps_a, *overlaps_b;
5546
5547       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5548                                       SUB_ACCESS_FN (subscript, b_index),
5549                                       &overlaps_a, &overlaps_b,
5550                                       &last_conflicts, loop_nest);
5551
5552       if (SUB_CONFLICTS_IN_A (subscript))
5553         free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5554       if (SUB_CONFLICTS_IN_B (subscript))
5555         free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5556
5557       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5558       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5559       SUB_LAST_CONFLICT (subscript) = last_conflicts;
5560
5561       /* If there is any undetermined conflict function we have to
5562          give a conservative answer in case we cannot prove that
5563          no dependence exists when analyzing another subscript.  */
5564       if (CF_NOT_KNOWN_P (overlaps_a)
5565           || CF_NOT_KNOWN_P (overlaps_b))
5566         {
5567           res = chrec_dont_know;
5568           continue;
5569         }
5570
5571       /* When there is a subscript with no dependence we can stop.  */
5572       else if (CF_NO_DEPENDENCE_P (overlaps_a)
5573                || CF_NO_DEPENDENCE_P (overlaps_b))
5574         {
5575           res = chrec_known;
5576           break;
5577         }
5578     }
5579
5580   if (res == NULL_TREE)
5581     return true;
5582
5583   if (res == chrec_known)
5584     dependence_stats.num_dependence_independent++;
5585   else
5586     dependence_stats.num_dependence_undetermined++;
5587   finalize_ddr_dependent (ddr, res);
5588   return false;
5589 }
5590
5591 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
5592
5593 static void
5594 subscript_dependence_tester (struct data_dependence_relation *ddr,
5595                              class loop *loop_nest)
5596 {
5597   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5598     dependence_stats.num_dependence_dependent++;
5599
5600   compute_subscript_distance (ddr);
5601   if (build_classic_dist_vector (ddr, loop_nest))
5602     build_classic_dir_vector (ddr);
5603 }
5604
5605 /* Returns true when all the access functions of A are affine or
5606    constant with respect to LOOP_NEST.  */
5607
5608 static bool
5609 access_functions_are_affine_or_constant_p (const struct data_reference *a,
5610                                            const class loop *loop_nest)
5611 {
5612   vec<tree> fns = DR_ACCESS_FNS (a);
5613   for (tree t : fns)
5614     if (!evolution_function_is_invariant_p (t, loop_nest->num)
5615         && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5616       return false;
5617
5618   return true;
5619 }
5620
5621 /* This computes the affine dependence relation between A and B with
5622    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
5623    independence between two accesses, while CHREC_DONT_KNOW is used
5624    for representing the unknown relation.
5625
5626    Note that it is possible to stop the computation of the dependence
5627    relation the first time we detect a CHREC_KNOWN element for a given
5628    subscript.  */
5629
5630 void
5631 compute_affine_dependence (struct data_dependence_relation *ddr,
5632                            class loop *loop_nest)
5633 {
5634   struct data_reference *dra = DDR_A (ddr);
5635   struct data_reference *drb = DDR_B (ddr);
5636
5637   if (dump_file && (dump_flags & TDF_DETAILS))
5638     {
5639       fprintf (dump_file, "(compute_affine_dependence\n");
5640       fprintf (dump_file, "  ref_a: ");
5641       print_generic_expr (dump_file, DR_REF (dra));
5642       fprintf (dump_file, ", stmt_a: ");
5643       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5644       fprintf (dump_file, "  ref_b: ");
5645       print_generic_expr (dump_file, DR_REF (drb));
5646       fprintf (dump_file, ", stmt_b: ");
5647       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5648     }
5649
5650   /* Analyze only when the dependence relation is not yet known.  */
5651   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5652     {
5653       dependence_stats.num_dependence_tests++;
5654
5655       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5656           && access_functions_are_affine_or_constant_p (drb, loop_nest))
5657         subscript_dependence_tester (ddr, loop_nest);
5658
5659       /* As a last case, if the dependence cannot be determined, or if
5660          the dependence is considered too difficult to determine, answer
5661          "don't know".  */
5662       else
5663         {
5664           dependence_stats.num_dependence_undetermined++;
5665
5666           if (dump_file && (dump_flags & TDF_DETAILS))
5667             {
5668               fprintf (dump_file, "Data ref a:\n");
5669               dump_data_reference (dump_file, dra);
5670               fprintf (dump_file, "Data ref b:\n");
5671               dump_data_reference (dump_file, drb);
5672               fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5673             }
5674           finalize_ddr_dependent (ddr, chrec_dont_know);
5675         }
5676     }
5677
5678   if (dump_file && (dump_flags & TDF_DETAILS))
5679     {
5680       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5681         fprintf (dump_file, ") -> no dependence\n");
5682       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5683         fprintf (dump_file, ") -> dependence analysis failed\n");
5684       else
5685         fprintf (dump_file, ")\n");
5686     }
5687 }
5688
5689 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5690    the data references in DATAREFS, in the LOOP_NEST.  When
5691    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5692    relations.  Return true when successful, i.e. data references number
5693    is small enough to be handled.  */
5694
5695 bool
5696 compute_all_dependences (const vec<data_reference_p> &datarefs,
5697                          vec<ddr_p> *dependence_relations,
5698                          const vec<loop_p> &loop_nest,
5699                          bool compute_self_and_rr)
5700 {
5701   struct data_dependence_relation *ddr;
5702   struct data_reference *a, *b;
5703   unsigned int i, j;
5704
5705   if ((int) datarefs.length ()
5706       > param_loop_max_datarefs_for_datadeps)
5707     {
5708       struct data_dependence_relation *ddr;
5709
5710       /* Insert a single relation into dependence_relations:
5711          chrec_dont_know.  */
5712       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5713       dependence_relations->safe_push (ddr);
5714       return false;
5715     }
5716
5717   FOR_EACH_VEC_ELT (datarefs, i, a)
5718     for (j = i + 1; datarefs.iterate (j, &b); j++)
5719       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5720         {
5721           ddr = initialize_data_dependence_relation (a, b, loop_nest);
5722           dependence_relations->safe_push (ddr);
5723           if (loop_nest.exists ())
5724             compute_affine_dependence (ddr, loop_nest[0]);
5725         }
5726
5727   if (compute_self_and_rr)
5728     FOR_EACH_VEC_ELT (datarefs, i, a)
5729       {
5730         ddr = initialize_data_dependence_relation (a, a, loop_nest);
5731         dependence_relations->safe_push (ddr);
5732         if (loop_nest.exists ())
5733           compute_affine_dependence (ddr, loop_nest[0]);
5734       }
5735
5736   return true;
5737 }
5738
5739 /* Describes a location of a memory reference.  */
5740
5741 struct data_ref_loc
5742 {
5743   /* The memory reference.  */
5744   tree ref;
5745
5746   /* True if the memory reference is read.  */
5747   bool is_read;
5748
5749   /* True if the data reference is conditional within the containing
5750      statement, i.e. if it might not occur even when the statement
5751      is executed and runs to completion.  */
5752   bool is_conditional_in_stmt;
5753 };
5754
5755
5756 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
5757    true if STMT clobbers memory, false otherwise.  */
5758
5759 static bool
5760 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5761 {
5762   bool clobbers_memory = false;
5763   data_ref_loc ref;
5764   tree op0, op1;
5765   enum gimple_code stmt_code = gimple_code (stmt);
5766
5767   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5768      As we cannot model data-references to not spelled out
5769      accesses give up if they may occur.  */
5770   if (stmt_code == GIMPLE_CALL
5771       && !(gimple_call_flags (stmt) & ECF_CONST))
5772     {
5773       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
5774       if (gimple_call_internal_p (stmt))
5775         switch (gimple_call_internal_fn (stmt))
5776           {
5777           case IFN_GOMP_SIMD_LANE:
5778             {
5779               class loop *loop = gimple_bb (stmt)->loop_father;
5780               tree uid = gimple_call_arg (stmt, 0);
5781               gcc_assert (TREE_CODE (uid) == SSA_NAME);
5782               if (loop == NULL
5783                   || loop->simduid != SSA_NAME_VAR (uid))
5784                 clobbers_memory = true;
5785               break;
5786             }
5787           case IFN_MASK_LOAD:
5788           case IFN_MASK_STORE:
5789             break;
5790           default:
5791             clobbers_memory = true;
5792             break;
5793           }
5794       else
5795         clobbers_memory = true;
5796     }
5797   else if (stmt_code == GIMPLE_ASM
5798            && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5799                || gimple_vuse (stmt)))
5800     clobbers_memory = true;
5801
5802   if (!gimple_vuse (stmt))
5803     return clobbers_memory;
5804
5805   if (stmt_code == GIMPLE_ASSIGN)
5806     {
5807       tree base;
5808       op0 = gimple_assign_lhs (stmt);
5809       op1 = gimple_assign_rhs1 (stmt);
5810
5811       if (DECL_P (op1)
5812           || (REFERENCE_CLASS_P (op1)
5813               && (base = get_base_address (op1))
5814               && TREE_CODE (base) != SSA_NAME
5815               && !is_gimple_min_invariant (base)))
5816         {
5817           ref.ref = op1;
5818           ref.is_read = true;
5819           ref.is_conditional_in_stmt = false;
5820           references->safe_push (ref);
5821         }
5822     }
5823   else if (stmt_code == GIMPLE_CALL)
5824     {
5825       unsigned i, n;
5826       tree ptr, type;
5827       unsigned int align;
5828
5829       ref.is_read = false;
5830       if (gimple_call_internal_p (stmt))
5831         switch (gimple_call_internal_fn (stmt))
5832           {
5833           case IFN_MASK_LOAD:
5834             if (gimple_call_lhs (stmt) == NULL_TREE)
5835               break;
5836             ref.is_read = true;
5837             /* FALLTHRU */
5838           case IFN_MASK_STORE:
5839             ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5840             align = tree_to_shwi (gimple_call_arg (stmt, 1));
5841             if (ref.is_read)
5842               type = TREE_TYPE (gimple_call_lhs (stmt));
5843             else
5844               type = TREE_TYPE (gimple_call_arg (stmt, 3));
5845             if (TYPE_ALIGN (type) != align)
5846               type = build_aligned_type (type, align);
5847             ref.is_conditional_in_stmt = true;
5848             ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5849                                    ptr);
5850             references->safe_push (ref);
5851             return false;
5852           default:
5853             break;
5854           }
5855
5856       op0 = gimple_call_lhs (stmt);
5857       n = gimple_call_num_args (stmt);
5858       for (i = 0; i < n; i++)
5859         {
5860           op1 = gimple_call_arg (stmt, i);
5861
5862           if (DECL_P (op1)
5863               || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5864             {
5865               ref.ref = op1;
5866               ref.is_read = true;
5867               ref.is_conditional_in_stmt = false;
5868               references->safe_push (ref);
5869             }
5870         }
5871     }
5872   else
5873     return clobbers_memory;
5874
5875   if (op0
5876       && (DECL_P (op0)
5877           || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5878     {
5879       ref.ref = op0;
5880       ref.is_read = false;
5881       ref.is_conditional_in_stmt = false;
5882       references->safe_push (ref);
5883     }
5884   return clobbers_memory;
5885 }
5886
5887
5888 /* Returns true if the loop-nest has any data reference.  */
5889
5890 bool
5891 loop_nest_has_data_refs (loop_p loop)
5892 {
5893   basic_block *bbs = get_loop_body (loop);
5894   auto_vec<data_ref_loc, 3> references;
5895
5896   for (unsigned i = 0; i < loop->num_nodes; i++)
5897     {
5898       basic_block bb = bbs[i];
5899       gimple_stmt_iterator bsi;
5900
5901       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5902         {
5903           gimple *stmt = gsi_stmt (bsi);
5904           get_references_in_stmt (stmt, &references);
5905           if (references.length ())
5906             {
5907               free (bbs);
5908               return true;
5909             }
5910         }
5911     }
5912   free (bbs);
5913   return false;
5914 }
5915
5916 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
5917    reference, returns false, otherwise returns true.  NEST is the outermost
5918    loop of the loop nest in which the references should be analyzed.  */
5919
5920 opt_result
5921 find_data_references_in_stmt (class loop *nest, gimple *stmt,
5922                               vec<data_reference_p> *datarefs)
5923 {
5924   auto_vec<data_ref_loc, 2> references;
5925   data_reference_p dr;
5926
5927   if (get_references_in_stmt (stmt, &references))
5928     return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5929                                    stmt);
5930
5931   for (const data_ref_loc &ref : references)
5932     {
5933       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5934                             loop_containing_stmt (stmt), ref.ref,
5935                             stmt, ref.is_read, ref.is_conditional_in_stmt);
5936       gcc_assert (dr != NULL);
5937       datarefs->safe_push (dr);
5938     }
5939
5940   return opt_result::success ();
5941 }
5942
5943 /* Stores the data references in STMT to DATAREFS.  If there is an
5944    unanalyzable reference, returns false, otherwise returns true.
5945    NEST is the outermost loop of the loop nest in which the references
5946    should be instantiated, LOOP is the loop in which the references
5947    should be analyzed.  */
5948
5949 bool
5950 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5951                                        vec<data_reference_p> *datarefs)
5952 {
5953   auto_vec<data_ref_loc, 2> references;
5954   bool ret = true;
5955   data_reference_p dr;
5956
5957   if (get_references_in_stmt (stmt, &references))
5958     return false;
5959
5960   for (const data_ref_loc &ref : references)
5961     {
5962       dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read,
5963                             ref.is_conditional_in_stmt);
5964       gcc_assert (dr != NULL);
5965       datarefs->safe_push (dr);
5966     }
5967
5968   return ret;
5969 }
5970
5971 /* Search the data references in LOOP, and record the information into
5972    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5973    difficult case, returns NULL_TREE otherwise.  */
5974
5975 tree
5976 find_data_references_in_bb (class loop *loop, basic_block bb,
5977                             vec<data_reference_p> *datarefs)
5978 {
5979   gimple_stmt_iterator bsi;
5980
5981   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5982     {
5983       gimple *stmt = gsi_stmt (bsi);
5984
5985       if (!find_data_references_in_stmt (loop, stmt, datarefs))
5986         {
5987           struct data_reference *res;
5988           res = XCNEW (struct data_reference);
5989           datarefs->safe_push (res);
5990
5991           return chrec_dont_know;
5992         }
5993     }
5994
5995   return NULL_TREE;
5996 }
5997
5998 /* Search the data references in LOOP, and record the information into
5999    DATAREFS.  Returns chrec_dont_know when failing to analyze a
6000    difficult case, returns NULL_TREE otherwise.
6001
6002    TODO: This function should be made smarter so that it can handle address
6003    arithmetic as if they were array accesses, etc.  */
6004
6005 tree
6006 find_data_references_in_loop (class loop *loop,
6007                               vec<data_reference_p> *datarefs)
6008 {
6009   basic_block bb, *bbs;
6010   unsigned int i;
6011
6012   bbs = get_loop_body_in_dom_order (loop);
6013
6014   for (i = 0; i < loop->num_nodes; i++)
6015     {
6016       bb = bbs[i];
6017
6018       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
6019         {
6020           free (bbs);
6021           return chrec_dont_know;
6022         }
6023     }
6024   free (bbs);
6025
6026   return NULL_TREE;
6027 }
6028
6029 /* Return the alignment in bytes that DRB is guaranteed to have at all
6030    times.  */
6031
6032 unsigned int
6033 dr_alignment (innermost_loop_behavior *drb)
6034 {
6035   /* Get the alignment of BASE_ADDRESS + INIT.  */
6036   unsigned int alignment = drb->base_alignment;
6037   unsigned int misalignment = (drb->base_misalignment
6038                                + TREE_INT_CST_LOW (drb->init));
6039   if (misalignment != 0)
6040     alignment = MIN (alignment, misalignment & -misalignment);
6041
6042   /* Cap it to the alignment of OFFSET.  */
6043   if (!integer_zerop (drb->offset))
6044     alignment = MIN (alignment, drb->offset_alignment);
6045
6046   /* Cap it to the alignment of STEP.  */
6047   if (!integer_zerop (drb->step))
6048     alignment = MIN (alignment, drb->step_alignment);
6049
6050   return alignment;
6051 }
6052
6053 /* If BASE is a pointer-typed SSA name, try to find the object that it
6054    is based on.  Return this object X on success and store the alignment
6055    in bytes of BASE - &X in *ALIGNMENT_OUT.  */
6056
6057 static tree
6058 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
6059 {
6060   if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
6061     return NULL_TREE;
6062
6063   gimple *def = SSA_NAME_DEF_STMT (base);
6064   base = analyze_scalar_evolution (loop_containing_stmt (def), base);
6065
6066   /* Peel chrecs and record the minimum alignment preserved by
6067      all steps.  */
6068   unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6069   while (TREE_CODE (base) == POLYNOMIAL_CHREC)
6070     {
6071       unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
6072       alignment = MIN (alignment, step_alignment);
6073       base = CHREC_LEFT (base);
6074     }
6075
6076   /* Punt if the expression is too complicated to handle.  */
6077   if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
6078     return NULL_TREE;
6079
6080   /* The only useful cases are those for which a dereference folds to something
6081      other than an INDIRECT_REF.  */
6082   tree ref_type = TREE_TYPE (TREE_TYPE (base));
6083   tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
6084   if (!ref)
6085     return NULL_TREE;
6086
6087   /* Analyze the base to which the steps we peeled were applied.  */
6088   poly_int64 bitsize, bitpos, bytepos;
6089   machine_mode mode;
6090   int unsignedp, reversep, volatilep;
6091   tree offset;
6092   base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
6093                               &unsignedp, &reversep, &volatilep);
6094   if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
6095     return NULL_TREE;
6096
6097   /* Restrict the alignment to that guaranteed by the offsets.  */
6098   unsigned int bytepos_alignment = known_alignment (bytepos);
6099   if (bytepos_alignment != 0)
6100     alignment = MIN (alignment, bytepos_alignment);
6101   if (offset)
6102     {
6103       unsigned int offset_alignment = highest_pow2_factor (offset);
6104       alignment = MIN (alignment, offset_alignment);
6105     }
6106
6107   *alignment_out = alignment;
6108   return base;
6109 }
6110
6111 /* Return the object whose alignment would need to be changed in order
6112    to increase the alignment of ADDR.  Store the maximum achievable
6113    alignment in *MAX_ALIGNMENT.  */
6114
6115 tree
6116 get_base_for_alignment (tree addr, unsigned int *max_alignment)
6117 {
6118   tree base = get_base_for_alignment_1 (addr, max_alignment);
6119   if (base)
6120     return base;
6121
6122   if (TREE_CODE (addr) == ADDR_EXPR)
6123     addr = TREE_OPERAND (addr, 0);
6124   *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6125   return addr;
6126 }
6127
6128 /* Recursive helper function.  */
6129
6130 static bool
6131 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
6132 {
6133   /* Inner loops of the nest should not contain siblings.  Example:
6134      when there are two consecutive loops,
6135
6136      | loop_0
6137      |   loop_1
6138      |     A[{0, +, 1}_1]
6139      |   endloop_1
6140      |   loop_2
6141      |     A[{0, +, 1}_2]
6142      |   endloop_2
6143      | endloop_0
6144
6145      the dependence relation cannot be captured by the distance
6146      abstraction.  */
6147   if (loop->next)
6148     return false;
6149
6150   loop_nest->safe_push (loop);
6151   if (loop->inner)
6152     return find_loop_nest_1 (loop->inner, loop_nest);
6153   return true;
6154 }
6155
6156 /* Return false when the LOOP is not well nested.  Otherwise return
6157    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
6158    contain the loops from the outermost to the innermost, as they will
6159    appear in the classic distance vector.  */
6160
6161 bool
6162 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
6163 {
6164   loop_nest->safe_push (loop);
6165   if (loop->inner)
6166     return find_loop_nest_1 (loop->inner, loop_nest);
6167   return true;
6168 }
6169
6170 /* Returns true when the data dependences have been computed, false otherwise.
6171    Given a loop nest LOOP, the following vectors are returned:
6172    DATAREFS is initialized to all the array elements contained in this loop,
6173    DEPENDENCE_RELATIONS contains the relations between the data references.
6174    Compute read-read and self relations if
6175    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
6176
6177 bool
6178 compute_data_dependences_for_loop (class loop *loop,
6179                                    bool compute_self_and_read_read_dependences,
6180                                    vec<loop_p> *loop_nest,
6181                                    vec<data_reference_p> *datarefs,
6182                                    vec<ddr_p> *dependence_relations)
6183 {
6184   bool res = true;
6185
6186   memset (&dependence_stats, 0, sizeof (dependence_stats));
6187
6188   /* If the loop nest is not well formed, or one of the data references
6189      is not computable, give up without spending time to compute other
6190      dependences.  */
6191   if (!loop
6192       || !find_loop_nest (loop, loop_nest)
6193       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
6194       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
6195                                    compute_self_and_read_read_dependences))
6196     res = false;
6197
6198   if (dump_file && (dump_flags & TDF_STATS))
6199     {
6200       fprintf (dump_file, "Dependence tester statistics:\n");
6201
6202       fprintf (dump_file, "Number of dependence tests: %d\n",
6203                dependence_stats.num_dependence_tests);
6204       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
6205                dependence_stats.num_dependence_dependent);
6206       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
6207                dependence_stats.num_dependence_independent);
6208       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
6209                dependence_stats.num_dependence_undetermined);
6210
6211       fprintf (dump_file, "Number of subscript tests: %d\n",
6212                dependence_stats.num_subscript_tests);
6213       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
6214                dependence_stats.num_subscript_undetermined);
6215       fprintf (dump_file, "Number of same subscript function: %d\n",
6216                dependence_stats.num_same_subscript_function);
6217
6218       fprintf (dump_file, "Number of ziv tests: %d\n",
6219                dependence_stats.num_ziv);
6220       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
6221                dependence_stats.num_ziv_dependent);
6222       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
6223                dependence_stats.num_ziv_independent);
6224       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
6225                dependence_stats.num_ziv_unimplemented);
6226
6227       fprintf (dump_file, "Number of siv tests: %d\n",
6228                dependence_stats.num_siv);
6229       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
6230                dependence_stats.num_siv_dependent);
6231       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
6232                dependence_stats.num_siv_independent);
6233       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
6234                dependence_stats.num_siv_unimplemented);
6235
6236       fprintf (dump_file, "Number of miv tests: %d\n",
6237                dependence_stats.num_miv);
6238       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
6239                dependence_stats.num_miv_dependent);
6240       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
6241                dependence_stats.num_miv_independent);
6242       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
6243                dependence_stats.num_miv_unimplemented);
6244     }
6245
6246   return res;
6247 }
6248
6249 /* Free the memory used by a data dependence relation DDR.  */
6250
6251 void
6252 free_dependence_relation (struct data_dependence_relation *ddr)
6253 {
6254   if (ddr == NULL)
6255     return;
6256
6257   if (DDR_SUBSCRIPTS (ddr).exists ())
6258     free_subscripts (DDR_SUBSCRIPTS (ddr));
6259   DDR_DIST_VECTS (ddr).release ();
6260   DDR_DIR_VECTS (ddr).release ();
6261
6262   free (ddr);
6263 }
6264
6265 /* Free the memory used by the data dependence relations from
6266    DEPENDENCE_RELATIONS.  */
6267
6268 void
6269 free_dependence_relations (vec<ddr_p>& dependence_relations)
6270 {
6271   for (data_dependence_relation *ddr : dependence_relations)
6272     if (ddr)
6273       free_dependence_relation (ddr);
6274
6275   dependence_relations.release ();
6276 }
6277
6278 /* Free the memory used by the data references from DATAREFS.  */
6279
6280 void
6281 free_data_refs (vec<data_reference_p>& datarefs)
6282 {
6283   for (data_reference *dr : datarefs)
6284     free_data_ref (dr);
6285   datarefs.release ();
6286 }
6287
6288 /* Common routine implementing both dr_direction_indicator and
6289    dr_zero_step_indicator.  Return USEFUL_MIN if the indicator is known
6290    to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6291    Return the step as the indicator otherwise.  */
6292
6293 static tree
6294 dr_step_indicator (struct data_reference *dr, int useful_min)
6295 {
6296   tree step = DR_STEP (dr);
6297   if (!step)
6298     return NULL_TREE;
6299   STRIP_NOPS (step);
6300   /* Look for cases where the step is scaled by a positive constant
6301      integer, which will often be the access size.  If the multiplication
6302      doesn't change the sign (due to overflow effects) then we can
6303      test the unscaled value instead.  */
6304   if (TREE_CODE (step) == MULT_EXPR
6305       && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6306       && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6307     {
6308       tree factor = TREE_OPERAND (step, 1);
6309       step = TREE_OPERAND (step, 0);
6310
6311       /* Strip widening and truncating conversions as well as nops.  */
6312       if (CONVERT_EXPR_P (step)
6313           && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6314         step = TREE_OPERAND (step, 0);
6315       tree type = TREE_TYPE (step);
6316
6317       /* Get the range of step values that would not cause overflow.  */
6318       widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6319                          / wi::to_widest (factor));
6320       widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6321                          / wi::to_widest (factor));
6322
6323       /* Get the range of values that the unconverted step actually has.  */
6324       wide_int step_min, step_max;
6325       value_range vr;
6326       if (TREE_CODE (step) != SSA_NAME
6327           || !get_range_query (cfun)->range_of_expr (vr, step)
6328           || vr.kind () != VR_RANGE)
6329         {
6330           step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6331           step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6332         }
6333       else
6334         {
6335           step_min = vr.lower_bound ();
6336           step_max = vr.upper_bound ();
6337         }
6338
6339       /* Check whether the unconverted step has an acceptable range.  */
6340       signop sgn = TYPE_SIGN (type);
6341       if (wi::les_p (minv, widest_int::from (step_min, sgn))
6342           && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6343         {
6344           if (wi::ge_p (step_min, useful_min, sgn))
6345             return ssize_int (useful_min);
6346           else if (wi::lt_p (step_max, 0, sgn))
6347             return ssize_int (-1);
6348           else
6349             return fold_convert (ssizetype, step);
6350         }
6351     }
6352   return DR_STEP (dr);
6353 }
6354
6355 /* Return a value that is negative iff DR has a negative step.  */
6356
6357 tree
6358 dr_direction_indicator (struct data_reference *dr)
6359 {
6360   return dr_step_indicator (dr, 0);
6361 }
6362
6363 /* Return a value that is zero iff DR has a zero step.  */
6364
6365 tree
6366 dr_zero_step_indicator (struct data_reference *dr)
6367 {
6368   return dr_step_indicator (dr, 1);
6369 }
6370
6371 /* Return true if DR is known to have a nonnegative (but possibly zero)
6372    step.  */
6373
6374 bool
6375 dr_known_forward_stride_p (struct data_reference *dr)
6376 {
6377   tree indicator = dr_direction_indicator (dr);
6378   tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6379                                    fold_convert (ssizetype, indicator),
6380                                    ssize_int (0));
6381   return neg_step_val && integer_zerop (neg_step_val);
6382 }