gcc/tree-data-ref.cc

   1 /* Data references and dependences detectors.
   2    Copyright (C) 2003-2023 Free Software Foundation, Inc.
   3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass walks a given loop structure searching for array
  22    references.  The information about the array accesses is recorded
  23    in DATA_REFERENCE structures.
  24
  25    The basic test for determining the dependences is:
  26    given two access functions chrec1 and chrec2 to a same array, and
  27    x and y two vectors from the iteration domain, the same element of
  28    the array is accessed twice at iterations x and y if and only if:
  29    |             chrec1 (x) == chrec2 (y).
  30
  31    The goals of this analysis are:
  32
  33    - to determine the independence: the relation between two
  34      independent accesses is qualified with the chrec_known (this
  35      information allows a loop parallelization),
  36
  37    - when two data references access the same data, to qualify the
  38      dependence relation with classic dependence representations:
  39
  40        - distance vectors
  41        - direction vectors
  42        - loop carried level dependence
  43        - polyhedron dependence
  44      or with the chains of recurrences based representation,
  45
  46    - to define a knowledge base for storing the data dependence
  47      information,
  48
  49    - to define an interface to access this data.
  50
  51
  52    Definitions:
  53
  54    - subscript: given two array accesses a subscript is the tuple
  55    composed of the access functions for a given dimension.  Example:
  56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
  57    (f1, g1), (f2, g2), (f3, g3).
  58
  59    - Diophantine equation: an equation whose coefficients and
  60    solutions are integer constants, for example the equation
  61    |   3*x + 2*y = 1
  62    has an integer solution x = 1 and y = -1.
  63
  64    References:
  65
  66    - "Advanced Compilation for High Performance Computing" by Randy
  67    Allen and Ken Kennedy.
  68    http://citeseer.ist.psu.edu/goff91practical.html
  69
  70    - "Loop Transformations for Restructuring Compilers - The Foundations"
  71    by Utpal Banerjee.
  72
  73
  74 */
  75
  76 #include "config.h"
  77 #include "system.h"
  78 #include "coretypes.h"
  79 #include "backend.h"
  80 #include "rtl.h"
  81 #include "tree.h"
  82 #include "gimple.h"
  83 #include "gimple-pretty-print.h"
  84 #include "alias.h"
  85 #include "fold-const.h"
  86 #include "expr.h"
  87 #include "gimple-iterator.h"
  88 #include "tree-ssa-loop-niter.h"
  89 #include "tree-ssa-loop.h"
  90 #include "tree-ssa.h"
  91 #include "cfgloop.h"
  92 #include "tree-data-ref.h"
  93 #include "tree-scalar-evolution.h"
  94 #include "dumpfile.h"
  95 #include "tree-affine.h"
  96 #include "builtins.h"
  97 #include "tree-eh.h"
  98 #include "ssa.h"
  99 #include "internal-fn.h"
 100 #include "vr-values.h"
 101 #include "range-op.h"
 102 #include "tree-ssa-loop-ivopts.h"
 103
 104 static struct datadep_stats
 105 {
 106   int num_dependence_tests;
 107   int num_dependence_dependent;
 108   int num_dependence_independent;
 109   int num_dependence_undetermined;
 110
 111   int num_subscript_tests;
 112   int num_subscript_undetermined;
 113   int num_same_subscript_function;
 114
 115   int num_ziv;
 116   int num_ziv_independent;
 117   int num_ziv_dependent;
 118   int num_ziv_unimplemented;
 119
 120   int num_siv;
 121   int num_siv_independent;
 122   int num_siv_dependent;
 123   int num_siv_unimplemented;
 124
 125   int num_miv;
 126   int num_miv_independent;
 127   int num_miv_dependent;
 128   int num_miv_unimplemented;
 129 } dependence_stats;
 130
 131 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
 132                                            unsigned int, unsigned int,
 133                                            class loop *);
 134 /* Returns true iff A divides B.  */
 135
 136 static inline bool
 137 tree_fold_divides_p (const_tree a, const_tree b)
 138 {
 139   gcc_assert (TREE_CODE (a) == INTEGER_CST);
 140   gcc_assert (TREE_CODE (b) == INTEGER_CST);
 141   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
 142 }
 143
 144 /* Returns true iff A divides B.  */
 145
 146 static inline bool
 147 int_divides_p (lambda_int a, lambda_int b)
 148 {
 149   return ((b % a) == 0);
 150 }
 151
 152 /* Return true if reference REF contains a union access.  */
 153
 154 static bool
 155 ref_contains_union_access_p (tree ref)
 156 {
 157   while (handled_component_p (ref))
 158     {
 159       ref = TREE_OPERAND (ref, 0);
 160       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
 161           || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
 162         return true;
 163     }
 164   return false;
 165 }
 166
 167 \f
 168
 169 /* Dump into FILE all the data references from DATAREFS.  */
 170
 171 static void
 172 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
 173 {
 174   for (data_reference *dr : datarefs)
 175     dump_data_reference (file, dr);
 176 }
 177
 178 /* Unified dump into FILE all the data references from DATAREFS.  */
 179
 180 DEBUG_FUNCTION void
 181 debug (vec<data_reference_p> &ref)
 182 {
 183   dump_data_references (stderr, ref);
 184 }
 185
 186 DEBUG_FUNCTION void
 187 debug (vec<data_reference_p> *ptr)
 188 {
 189   if (ptr)
 190     debug (*ptr);
 191   else
 192     fprintf (stderr, "<nil>\n");
 193 }
 194
 195
 196 /* Dump into STDERR all the data references from DATAREFS.  */
 197
 198 DEBUG_FUNCTION void
 199 debug_data_references (vec<data_reference_p> datarefs)
 200 {
 201   dump_data_references (stderr, datarefs);
 202 }
 203
 204 /* Print to STDERR the data_reference DR.  */
 205
 206 DEBUG_FUNCTION void
 207 debug_data_reference (struct data_reference *dr)
 208 {
 209   dump_data_reference (stderr, dr);
 210 }
 211
 212 /* Dump function for a DATA_REFERENCE structure.  */
 213
 214 void
 215 dump_data_reference (FILE *outf,
 216                      struct data_reference *dr)
 217 {
 218   unsigned int i;
 219
 220   fprintf (outf, "#(Data Ref: \n");
 221   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
 222   fprintf (outf, "#  stmt: ");
 223   print_gimple_stmt (outf, DR_STMT (dr), 0);
 224   fprintf (outf, "#  ref: ");
 225   print_generic_stmt (outf, DR_REF (dr));
 226   fprintf (outf, "#  base_object: ");
 227   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
 228
 229   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
 230     {
 231       fprintf (outf, "#  Access function %d: ", i);
 232       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
 233     }
 234   fprintf (outf, "#)\n");
 235 }
 236
 237 /* Unified dump function for a DATA_REFERENCE structure.  */
 238
 239 DEBUG_FUNCTION void
 240 debug (data_reference &ref)
 241 {
 242   dump_data_reference (stderr, &ref);
 243 }
 244
 245 DEBUG_FUNCTION void
 246 debug (data_reference *ptr)
 247 {
 248   if (ptr)
 249     debug (*ptr);
 250   else
 251     fprintf (stderr, "<nil>\n");
 252 }
 253
 254
 255 /* Dumps the affine function described by FN to the file OUTF.  */
 256
 257 DEBUG_FUNCTION void
 258 dump_affine_function (FILE *outf, affine_fn fn)
 259 {
 260   unsigned i;
 261   tree coef;
 262
 263   print_generic_expr (outf, fn[0], TDF_SLIM);
 264   for (i = 1; fn.iterate (i, &coef); i++)
 265     {
 266       fprintf (outf, " + ");
 267       print_generic_expr (outf, coef, TDF_SLIM);
 268       fprintf (outf, " * x_%u", i);
 269     }
 270 }
 271
 272 /* Dumps the conflict function CF to the file OUTF.  */
 273
 274 DEBUG_FUNCTION void
 275 dump_conflict_function (FILE *outf, conflict_function *cf)
 276 {
 277   unsigned i;
 278
 279   if (cf->n == NO_DEPENDENCE)
 280     fprintf (outf, "no dependence");
 281   else if (cf->n == NOT_KNOWN)
 282     fprintf (outf, "not known");
 283   else
 284     {
 285       for (i = 0; i < cf->n; i++)
 286         {
 287           if (i != 0)
 288             fprintf (outf, " ");
 289           fprintf (outf, "[");
 290           dump_affine_function (outf, cf->fns[i]);
 291           fprintf (outf, "]");
 292         }
 293     }
 294 }
 295
 296 /* Dump function for a SUBSCRIPT structure.  */
 297
 298 DEBUG_FUNCTION void
 299 dump_subscript (FILE *outf, struct subscript *subscript)
 300 {
 301   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
 302
 303   fprintf (outf, "\n (subscript \n");
 304   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
 305   dump_conflict_function (outf, cf);
 306   if (CF_NONTRIVIAL_P (cf))
 307     {
 308       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 309       fprintf (outf, "\n  last_conflict: ");
 310       print_generic_expr (outf, last_iteration);
 311     }
 312
 313   cf = SUB_CONFLICTS_IN_B (subscript);
 314   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
 315   dump_conflict_function (outf, cf);
 316   if (CF_NONTRIVIAL_P (cf))
 317     {
 318       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 319       fprintf (outf, "\n  last_conflict: ");
 320       print_generic_expr (outf, last_iteration);
 321     }
 322
 323   fprintf (outf, "\n  (Subscript distance: ");
 324   print_generic_expr (outf, SUB_DISTANCE (subscript));
 325   fprintf (outf, " ))\n");
 326 }
 327
 328 /* Print the classic direction vector DIRV to OUTF.  */
 329
 330 DEBUG_FUNCTION void
 331 print_direction_vector (FILE *outf,
 332                         lambda_vector dirv,
 333                         int length)
 334 {
 335   int eq;
 336
 337   for (eq = 0; eq < length; eq++)
 338     {
 339       enum data_dependence_direction dir = ((enum data_dependence_direction)
 340                                             dirv[eq]);
 341
 342       switch (dir)
 343         {
 344         case dir_positive:
 345           fprintf (outf, "    +");
 346           break;
 347         case dir_negative:
 348           fprintf (outf, "    -");
 349           break;
 350         case dir_equal:
 351           fprintf (outf, "    =");
 352           break;
 353         case dir_positive_or_equal:
 354           fprintf (outf, "   +=");
 355           break;
 356         case dir_positive_or_negative:
 357           fprintf (outf, "   +-");
 358           break;
 359         case dir_negative_or_equal:
 360           fprintf (outf, "   -=");
 361           break;
 362         case dir_star:
 363           fprintf (outf, "    *");
 364           break;
 365         default:
 366           fprintf (outf, "indep");
 367           break;
 368         }
 369     }
 370   fprintf (outf, "\n");
 371 }
 372
 373 /* Print a vector of direction vectors.  */
 374
 375 DEBUG_FUNCTION void
 376 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
 377                    int length)
 378 {
 379   for (lambda_vector v : dir_vects)
 380     print_direction_vector (outf, v, length);
 381 }
 382
 383 /* Print out a vector VEC of length N to OUTFILE.  */
 384
 385 DEBUG_FUNCTION void
 386 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
 387 {
 388   int i;
 389
 390   for (i = 0; i < n; i++)
 391     fprintf (outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]);
 392   fprintf (outfile, "\n");
 393 }
 394
 395 /* Print a vector of distance vectors.  */
 396
 397 DEBUG_FUNCTION void
 398 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
 399                     int length)
 400 {
 401   for (lambda_vector v : dist_vects)
 402     print_lambda_vector (outf, v, length);
 403 }
 404
 405 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
 406
 407 DEBUG_FUNCTION void
 408 dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
 409 {
 410   struct data_reference *dra, *drb;
 411
 412   fprintf (outf, "(Data Dep: \n");
 413
 414   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 415     {
 416       if (ddr)
 417         {
 418           dra = DDR_A (ddr);
 419           drb = DDR_B (ddr);
 420           if (dra)
 421             dump_data_reference (outf, dra);
 422           else
 423             fprintf (outf, "    (nil)\n");
 424           if (drb)
 425             dump_data_reference (outf, drb);
 426           else
 427             fprintf (outf, "    (nil)\n");
 428         }
 429       fprintf (outf, "    (don't know)\n)\n");
 430       return;
 431     }
 432
 433   dra = DDR_A (ddr);
 434   drb = DDR_B (ddr);
 435   dump_data_reference (outf, dra);
 436   dump_data_reference (outf, drb);
 437
 438   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 439     fprintf (outf, "    (no dependence)\n");
 440
 441   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
 442     {
 443       unsigned int i;
 444       class loop *loopi;
 445
 446       subscript *sub;
 447       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
 448         {
 449           fprintf (outf, "  access_fn_A: ");
 450           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
 451           fprintf (outf, "  access_fn_B: ");
 452           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
 453           dump_subscript (outf, sub);
 454         }
 455
 456       fprintf (outf, "  loop nest: (");
 457       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
 458         fprintf (outf, "%d ", loopi->num);
 459       fprintf (outf, ")\n");
 460
 461       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
 462         {
 463           fprintf (outf, "  distance_vector: ");
 464           print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
 465                                DDR_NB_LOOPS (ddr));
 466         }
 467
 468       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
 469         {
 470           fprintf (outf, "  direction_vector: ");
 471           print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
 472                                   DDR_NB_LOOPS (ddr));
 473         }
 474     }
 475
 476   fprintf (outf, ")\n");
 477 }
 478
 479 /* Debug version.  */
 480
 481 DEBUG_FUNCTION void
 482 debug_data_dependence_relation (const struct data_dependence_relation *ddr)
 483 {
 484   dump_data_dependence_relation (stderr, ddr);
 485 }
 486
 487 /* Dump into FILE all the dependence relations from DDRS.  */
 488
 489 DEBUG_FUNCTION void
 490 dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
 491 {
 492   for (auto ddr : ddrs)
 493     dump_data_dependence_relation (file, ddr);
 494 }
 495
 496 DEBUG_FUNCTION void
 497 debug (vec<ddr_p> &ref)
 498 {
 499   dump_data_dependence_relations (stderr, ref);
 500 }
 501
 502 DEBUG_FUNCTION void
 503 debug (vec<ddr_p> *ptr)
 504 {
 505   if (ptr)
 506     debug (*ptr);
 507   else
 508     fprintf (stderr, "<nil>\n");
 509 }
 510
 511
 512 /* Dump to STDERR all the dependence relations from DDRS.  */
 513
 514 DEBUG_FUNCTION void
 515 debug_data_dependence_relations (vec<ddr_p> ddrs)
 516 {
 517   dump_data_dependence_relations (stderr, ddrs);
 518 }
 519
 520 /* Dumps the distance and direction vectors in FILE.  DDRS contains
 521    the dependence relations, and VECT_SIZE is the size of the
 522    dependence vectors, or in other words the number of loops in the
 523    considered nest.  */
 524
 525 DEBUG_FUNCTION void
 526 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
 527 {
 528   for (data_dependence_relation *ddr : ddrs)
 529     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
 530       {
 531         for (lambda_vector v : DDR_DIST_VECTS (ddr))
 532           {
 533             fprintf (file, "DISTANCE_V (");
 534             print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
 535             fprintf (file, ")\n");
 536           }
 537
 538         for (lambda_vector v : DDR_DIR_VECTS (ddr))
 539           {
 540             fprintf (file, "DIRECTION_V (");
 541             print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
 542             fprintf (file, ")\n");
 543           }
 544       }
 545
 546   fprintf (file, "\n\n");
 547 }
 548
 549 /* Dumps the data dependence relations DDRS in FILE.  */
 550
 551 DEBUG_FUNCTION void
 552 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
 553 {
 554   for (data_dependence_relation *ddr : ddrs)
 555     dump_data_dependence_relation (file, ddr);
 556
 557   fprintf (file, "\n\n");
 558 }
 559
 560 DEBUG_FUNCTION void
 561 debug_ddrs (vec<ddr_p> ddrs)
 562 {
 563   dump_ddrs (stderr, ddrs);
 564 }
 565
 566 /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
 567    OP0 CODE OP1, where:
 568
 569    - OP0 CODE OP1 has integral type TYPE
 570    - the range of OP0 is given by OP0_RANGE and
 571    - the range of OP1 is given by OP1_RANGE.
 572
 573    Independently of RESULT_RANGE, try to compute:
 574
 575      DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
 576              - (sizetype) (OP0 CODE OP1)
 577
 578    as a constant and subtract DELTA from the ssizetype constant in *OFF.
 579    Return true on success, or false if DELTA is not known at compile time.
 580
 581    Truncation and sign changes are known to distribute over CODE, i.e.
 582
 583      (itype) (A CODE B) == (itype) A CODE (itype) B
 584
 585    for any integral type ITYPE whose precision is no greater than the
 586    precision of A and B.  */
 587
 588 static bool
 589 compute_distributive_range (tree type, value_range &op0_range,
 590                             tree_code code, value_range &op1_range,
 591                             tree *off, value_range *result_range)
 592 {
 593   gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
 594   if (result_range)
 595     {
 596       range_op_handler op (code, type);
 597       if (!op.fold_range (*result_range, type, op0_range, op1_range))
 598         result_range->set_varying (type);
 599     }
 600
 601   /* The distributive property guarantees that if TYPE is no narrower
 602      than SIZETYPE,
 603
 604        (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
 605
 606      and so we can treat DELTA as zero.  */
 607   if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
 608     return true;
 609
 610   /* If overflow is undefined, we can assume that:
 611
 612        X == (ssizetype) OP0 CODE (ssizetype) OP1
 613
 614      is within the range of TYPE, i.e.:
 615
 616        X == (ssizetype) (TYPE) X
 617
 618      Distributing the (TYPE) truncation over X gives:
 619
 620        X == (ssizetype) (OP0 CODE OP1)
 621
 622      Casting both sides to sizetype and distributing the sizetype cast
 623      over X gives:
 624
 625        (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
 626
 627      and so we can treat DELTA as zero.  */
 628   if (TYPE_OVERFLOW_UNDEFINED (type))
 629     return true;
 630
 631   /* Compute the range of:
 632
 633        (ssizetype) OP0 CODE (ssizetype) OP1
 634
 635      The distributive property guarantees that this has the same bitpattern as:
 636
 637        (sizetype) OP0 CODE (sizetype) OP1
 638
 639      but its range is more conducive to analysis.  */
 640   range_cast (op0_range, ssizetype);
 641   range_cast (op1_range, ssizetype);
 642   value_range wide_range;
 643   range_op_handler op (code, ssizetype);
 644   bool saved_flag_wrapv = flag_wrapv;
 645   flag_wrapv = 1;
 646   if (!op.fold_range (wide_range, ssizetype, op0_range, op1_range))
 647     wide_range.set_varying (ssizetype);;
 648   flag_wrapv = saved_flag_wrapv;
 649   if (wide_range.num_pairs () != 1 || !range_int_cst_p (&wide_range))
 650     return false;
 651
 652   wide_int lb = wide_range.lower_bound ();
 653   wide_int ub = wide_range.upper_bound ();
 654
 655   /* Calculate the number of times that each end of the range overflows or
 656      underflows TYPE.  We can only calculate DELTA if the numbers match.  */
 657   unsigned int precision = TYPE_PRECISION (type);
 658   if (!TYPE_UNSIGNED (type))
 659     {
 660       wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
 661       lb -= type_min;
 662       ub -= type_min;
 663     }
 664   wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
 665   lb &= upper_bits;
 666   ub &= upper_bits;
 667   if (lb != ub)
 668     return false;
 669
 670   /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
 671      negative values indicating underflow.  The low PRECISION bits of LB
 672      are clear, so DELTA is therefore LB (== UB).  */
 673   *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
 674   return true;
 675 }
 676
 677 /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
 678    given that OP has type FROM_TYPE and range RANGE.  Both TO_TYPE and
 679    FROM_TYPE are integral types.  */
 680
 681 static bool
 682 nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range)
 683 {
 684   gcc_assert (INTEGRAL_TYPE_P (to_type)
 685               && INTEGRAL_TYPE_P (from_type)
 686               && !TYPE_OVERFLOW_TRAPS (to_type)
 687               && !TYPE_OVERFLOW_TRAPS (from_type));
 688
 689   /* Converting to something no narrower than sizetype and then to sizetype
 690      is equivalent to converting directly to sizetype.  */
 691   if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
 692     return true;
 693
 694   /* Check whether TO_TYPE can represent all values that FROM_TYPE can.  */
 695   if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
 696       && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
 697     return true;
 698
 699   /* For narrowing conversions, we could in principle test whether
 700      the bits in FROM_TYPE but not in TO_TYPE have a fixed value
 701      and apply a constant adjustment.
 702
 703      For other conversions (which involve a sign change) we could
 704      check that the signs are always equal, and apply a constant
 705      adjustment if the signs are negative.
 706
 707      However, both cases should be rare.  */
 708   return range_fits_type_p (&range, TYPE_PRECISION (to_type),
 709                             TYPE_SIGN (to_type));
 710 }
 711
 712 static void
 713 split_constant_offset (tree type, tree *var, tree *off,
 714                        value_range *result_range,
 715                        hash_map<tree, std::pair<tree, tree> > &cache,
 716                        unsigned *limit);
 717
 718 /* Helper function for split_constant_offset.  If TYPE is a pointer type,
 719    try to express OP0 CODE OP1 as:
 720
 721      POINTER_PLUS <*VAR, (sizetype) *OFF>
 722
 723    where:
 724
 725    - *VAR has type TYPE
 726    - *OFF is a constant of type ssizetype.
 727
 728    If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
 729
 730      *VAR + (sizetype) *OFF
 731
 732    where:
 733
 734    - *VAR has type sizetype
 735    - *OFF is a constant of type ssizetype.
 736
 737    In both cases, OP0 CODE OP1 has type TYPE.
 738
 739    Return true on success.  A false return value indicates that we can't
 740    do better than set *OFF to zero.
 741
 742    When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
 743    if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
 744
 745    CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
 746    visited.  LIMIT counts down the number of SSA names that we are
 747    allowed to process before giving up.  */
 748
 749 static bool
 750 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 751                          tree *var, tree *off, value_range *result_range,
 752                          hash_map<tree, std::pair<tree, tree> > &cache,
 753                          unsigned *limit)
 754 {
 755   tree var0, var1;
 756   tree off0, off1;
 757   value_range op0_range, op1_range;
 758
 759   *var = NULL_TREE;
 760   *off = NULL_TREE;
 761
 762   if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
 763     return false;
 764
 765   switch (code)
 766     {
 767     case INTEGER_CST:
 768       *var = size_int (0);
 769       *off = fold_convert (ssizetype, op0);
 770       if (result_range)
 771         result_range->set (op0, op0);
 772       return true;
 773
 774     case POINTER_PLUS_EXPR:
 775       split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
 776       split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
 777       *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
 778       *off = size_binop (PLUS_EXPR, off0, off1);
 779       return true;
 780
 781     case PLUS_EXPR:
 782     case MINUS_EXPR:
 783       split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
 784       split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
 785       *off = size_binop (code, off0, off1);
 786       if (!compute_distributive_range (type, op0_range, code, op1_range,
 787                                        off, result_range))
 788         return false;
 789       *var = fold_build2 (code, sizetype, var0, var1);
 790       return true;
 791
 792     case MULT_EXPR:
 793       if (TREE_CODE (op1) != INTEGER_CST)
 794         return false;
 795
 796       split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
 797       op1_range.set (op1, op1);
 798       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
 799       if (!compute_distributive_range (type, op0_range, code, op1_range,
 800                                        off, result_range))
 801         return false;
 802       *var = fold_build2 (MULT_EXPR, sizetype, var0,
 803                           fold_convert (sizetype, op1));
 804       return true;
 805
 806     case ADDR_EXPR:
 807       {
 808         tree base, poffset;
 809         poly_int64 pbitsize, pbitpos, pbytepos;
 810         machine_mode pmode;
 811         int punsignedp, preversep, pvolatilep;
 812
 813         op0 = TREE_OPERAND (op0, 0);
 814         base
 815           = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
 816                                  &punsignedp, &preversep, &pvolatilep);
 817
 818         if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 819           return false;
 820         base = build_fold_addr_expr (base);
 821         off0 = ssize_int (pbytepos);
 822
 823         if (poffset)
 824           {
 825             split_constant_offset (poffset, &poffset, &off1, nullptr,
 826                                    cache, limit);
 827             off0 = size_binop (PLUS_EXPR, off0, off1);
 828             base = fold_build_pointer_plus (base, poffset);
 829           }
 830
 831         var0 = fold_convert (type, base);
 832
 833         /* If variable length types are involved, punt, otherwise casts
 834            might be converted into ARRAY_REFs in gimplify_conversion.
 835            To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
 836            possibly no longer appears in current GIMPLE, might resurface.
 837            This perhaps could run
 838            if (CONVERT_EXPR_P (var0))
 839              {
 840                gimplify_conversion (&var0);
 841                // Attempt to fill in any within var0 found ARRAY_REF's
 842                // element size from corresponding op embedded ARRAY_REF,
 843                // if unsuccessful, just punt.
 844              }  */
 845         while (POINTER_TYPE_P (type))
 846           type = TREE_TYPE (type);
 847         if (int_size_in_bytes (type) < 0)
 848           return false;
 849
 850         *var = var0;
 851         *off = off0;
 852         return true;
 853       }
 854
 855     case SSA_NAME:
 856       {
 857         if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
 858           return false;
 859
 860         gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
 861         enum tree_code subcode;
 862
 863         if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
 864           return false;
 865
 866         subcode = gimple_assign_rhs_code (def_stmt);
 867
 868         /* We are using a cache to avoid un-CSEing large amounts of code.  */
 869         bool use_cache = false;
 870         if (!has_single_use (op0)
 871             && (subcode == POINTER_PLUS_EXPR
 872                 || subcode == PLUS_EXPR
 873                 || subcode == MINUS_EXPR
 874                 || subcode == MULT_EXPR
 875                 || subcode == ADDR_EXPR
 876                 || CONVERT_EXPR_CODE_P (subcode)))
 877           {
 878             use_cache = true;
 879             bool existed;
 880             std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
 881             if (existed)
 882               {
 883                 if (integer_zerop (e.second))
 884                   return false;
 885                 *var = e.first;
 886                 *off = e.second;
 887                 /* The caller sets the range in this case.  */
 888                 return true;
 889               }
 890             e = std::make_pair (op0, ssize_int (0));
 891           }
 892
 893         if (*limit == 0)
 894           return false;
 895         --*limit;
 896
 897         var0 = gimple_assign_rhs1 (def_stmt);
 898         var1 = gimple_assign_rhs2 (def_stmt);
 899
 900         bool res = split_constant_offset_1 (type, var0, subcode, var1,
 901                                             var, off, nullptr, cache, limit);
 902         if (res && use_cache)
 903           *cache.get (op0) = std::make_pair (*var, *off);
 904         /* The caller sets the range in this case.  */
 905         return res;
 906       }
 907     CASE_CONVERT:
 908       {
 909         /* We can only handle the following conversions:
 910
 911            - Conversions from one pointer type to another pointer type.
 912
 913            - Conversions from one non-trapping integral type to another
 914              non-trapping integral type.  In this case, the recursive
 915              call makes sure that:
 916
 917                (sizetype) OP0
 918
 919              can be expressed as a sizetype operation involving VAR and OFF,
 920              and all we need to do is check whether:
 921
 922                (sizetype) OP0 == (sizetype) (TYPE) OP0
 923
 924            - Conversions from a non-trapping sizetype-size integral type to
 925              a like-sized pointer type.  In this case, the recursive call
 926              makes sure that:
 927
 928                (sizetype) OP0 == *VAR + (sizetype) *OFF
 929
 930              and we can convert that to:
 931
 932                POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
 933
 934            - Conversions from a sizetype-sized pointer type to a like-sized
 935              non-trapping integral type.  In this case, the recursive call
 936              makes sure that:
 937
 938                OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
 939
 940              where the POINTER_PLUS and *VAR have the same precision as
 941              TYPE (and the same precision as sizetype).  Then:
 942
 943                (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF.  */
 944         tree itype = TREE_TYPE (op0);
 945         if ((POINTER_TYPE_P (itype)
 946              || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
 947             && (POINTER_TYPE_P (type)
 948                 || (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
 949             && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
 950                 || (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
 951                     && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
 952           {
 953             if (POINTER_TYPE_P (type))
 954               {
 955                 split_constant_offset (op0, var, off, nullptr, cache, limit);
 956                 *var = fold_convert (type, *var);
 957               }
 958             else if (POINTER_TYPE_P (itype))
 959               {
 960                 split_constant_offset (op0, var, off, nullptr, cache, limit);
 961                 *var = fold_convert (sizetype, *var);
 962               }
 963             else
 964               {
 965                 split_constant_offset (op0, var, off, &op0_range,
 966                                        cache, limit);
 967                 if (!nop_conversion_for_offset_p (type, itype, op0_range))
 968                   return false;
 969                 if (result_range)
 970                   {
 971                     *result_range = op0_range;
 972                     range_cast (*result_range, type);
 973                   }
 974               }
 975             return true;
 976           }
 977         return false;
 978       }
 979
 980     default:
 981       return false;
 982     }
 983 }
 984
 985 /* If EXP has pointer type, try to express it as:
 986
 987      POINTER_PLUS <*VAR, (sizetype) *OFF>
 988
 989    where:
 990
 991    - *VAR has the same type as EXP
 992    - *OFF is a constant of type ssizetype.
 993
 994    If EXP has an integral type, try to express (sizetype) EXP as:
 995
 996      *VAR + (sizetype) *OFF
 997
 998    where:
 999
1000    - *VAR has type sizetype
1001    - *OFF is a constant of type ssizetype.
1002
1003    If EXP_RANGE is nonnull, set it to the range of EXP.
1004
1005    CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1006    visited.  LIMIT counts down the number of SSA names that we are
1007    allowed to process before giving up.  */
1008
1009 static void
1010 split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range,
1011                        hash_map<tree, std::pair<tree, tree> > &cache,
1012                        unsigned *limit)
1013 {
1014   tree type = TREE_TYPE (exp), op0, op1;
1015   enum tree_code code;
1016
1017   code = TREE_CODE (exp);
1018   if (exp_range)
1019     {
1020       *exp_range = type;
1021       if (code == SSA_NAME)
1022         {
1023           value_range vr;
1024           get_range_query (cfun)->range_of_expr (vr, exp);
1025           if (vr.undefined_p ())
1026             vr.set_varying (TREE_TYPE (exp));
1027           wide_int var_min = wi::to_wide (vr.min ());
1028           wide_int var_max = wi::to_wide (vr.max ());
1029           value_range_kind vr_kind = vr.kind ();
1030           wide_int var_nonzero = get_nonzero_bits (exp);
1031           vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1032                                                        &var_min, &var_max,
1033                                                        var_nonzero,
1034                                                        TYPE_SIGN (type));
1035           /* This check for VR_VARYING is here because the old code
1036              using get_range_info would return VR_RANGE for the entire
1037              domain, instead of VR_VARYING.  The new code normalizes
1038              full-domain ranges to VR_VARYING.  */
1039           if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
1040             *exp_range = value_range (type, var_min, var_max);
1041         }
1042     }
1043
1044   if (!tree_is_chrec (exp)
1045       && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1046     {
1047       extract_ops_from_tree (exp, &code, &op0, &op1);
1048       if (split_constant_offset_1 (type, op0, code, op1, var, off,
1049                                    exp_range, cache, limit))
1050         return;
1051     }
1052
1053   *var = exp;
1054   if (INTEGRAL_TYPE_P (type))
1055     *var = fold_convert (sizetype, *var);
1056   *off = ssize_int (0);
1057
1058   value_range r;
1059   if (exp_range && code != SSA_NAME
1060       && get_range_query (cfun)->range_of_expr (r, exp)
1061       && !r.undefined_p ())
1062     *exp_range = r;
1063 }
1064
1065 /* Expresses EXP as VAR + OFF, where OFF is a constant.  VAR has the same
1066    type as EXP while OFF has type ssizetype.  */
1067
1068 void
1069 split_constant_offset (tree exp, tree *var, tree *off)
1070 {
1071   unsigned limit = param_ssa_name_def_chain_limit;
1072   static hash_map<tree, std::pair<tree, tree> > *cache;
1073   if (!cache)
1074     cache = new hash_map<tree, std::pair<tree, tree> > (37);
1075   split_constant_offset (exp, var, off, nullptr, *cache, &limit);
1076   *var = fold_convert (TREE_TYPE (exp), *var);
1077   cache->empty ();
1078 }
1079
1080 /* Returns the address ADDR of an object in a canonical shape (without nop
1081    casts, and with type of pointer to the object).  */
1082
1083 static tree
1084 canonicalize_base_object_address (tree addr)
1085 {
1086   tree orig = addr;
1087
1088   STRIP_NOPS (addr);
1089
1090   /* The base address may be obtained by casting from integer, in that case
1091      keep the cast.  */
1092   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1093     return orig;
1094
1095   if (TREE_CODE (addr) != ADDR_EXPR)
1096     return addr;
1097
1098   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1099 }
1100
1101 /* Analyze the behavior of memory reference REF within STMT.
1102    There are two modes:
1103
1104    - BB analysis.  In this case we simply split the address into base,
1105      init and offset components, without reference to any containing loop.
1106      The resulting base and offset are general expressions and they can
1107      vary arbitrarily from one iteration of the containing loop to the next.
1108      The step is always zero.
1109
1110    - loop analysis.  In this case we analyze the reference both wrt LOOP
1111      and on the basis that the reference occurs (is "used") in LOOP;
1112      see the comment above analyze_scalar_evolution_in_loop for more
1113      information about this distinction.  The base, init, offset and
1114      step fields are all invariant in LOOP.
1115
1116    Perform BB analysis if LOOP is null, or if LOOP is the function's
1117    dummy outermost loop.  In other cases perform loop analysis.
1118
1119    Return true if the analysis succeeded and store the results in DRB if so.
1120    BB analysis can only fail for bitfield or reversed-storage accesses.  */
1121
1122 opt_result
1123 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1124                       class loop *loop, const gimple *stmt)
1125 {
1126   poly_int64 pbitsize, pbitpos;
1127   tree base, poffset;
1128   machine_mode pmode;
1129   int punsignedp, preversep, pvolatilep;
1130   affine_iv base_iv, offset_iv;
1131   tree init, dinit, step;
1132   bool in_loop = (loop && loop->num);
1133
1134   if (dump_file && (dump_flags & TDF_DETAILS))
1135     fprintf (dump_file, "analyze_innermost: ");
1136
1137   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1138                               &punsignedp, &preversep, &pvolatilep);
1139   gcc_assert (base != NULL_TREE);
1140
1141   poly_int64 pbytepos;
1142   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
1143     return opt_result::failure_at (stmt,
1144                                    "failed: bit offset alignment.\n");
1145
1146   if (preversep)
1147     return opt_result::failure_at (stmt,
1148                                    "failed: reverse storage order.\n");
1149
1150   /* Calculate the alignment and misalignment for the inner reference.  */
1151   unsigned int HOST_WIDE_INT bit_base_misalignment;
1152   unsigned int bit_base_alignment;
1153   get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1154
1155   /* There are no bitfield references remaining in BASE, so the values
1156      we got back must be whole bytes.  */
1157   gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1158               && bit_base_misalignment % BITS_PER_UNIT == 0);
1159   unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1160   poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1161
1162   if (TREE_CODE (base) == MEM_REF)
1163     {
1164       if (!integer_zerop (TREE_OPERAND (base, 1)))
1165         {
1166           /* Subtract MOFF from the base and add it to POFFSET instead.
1167              Adjust the misalignment to reflect the amount we subtracted.  */
1168           poly_offset_int moff = mem_ref_offset (base);
1169           base_misalignment -= moff.force_shwi ();
1170           tree mofft = wide_int_to_tree (sizetype, moff);
1171           if (!poffset)
1172             poffset = mofft;
1173           else
1174             poffset = size_binop (PLUS_EXPR, poffset, mofft);
1175         }
1176       base = TREE_OPERAND (base, 0);
1177     }
1178   else
1179     base = build_fold_addr_expr (base);
1180
1181   if (in_loop)
1182     {
1183       if (!simple_iv (loop, loop, base, &base_iv, true))
1184         return opt_result::failure_at
1185           (stmt, "failed: evolution of base is not affine.\n");
1186     }
1187   else
1188     {
1189       base_iv.base = base;
1190       base_iv.step = ssize_int (0);
1191       base_iv.no_overflow = true;
1192     }
1193
1194   if (!poffset)
1195     {
1196       offset_iv.base = ssize_int (0);
1197       offset_iv.step = ssize_int (0);
1198     }
1199   else
1200     {
1201       if (!in_loop)
1202         {
1203           offset_iv.base = poffset;
1204           offset_iv.step = ssize_int (0);
1205         }
1206       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1207         return opt_result::failure_at
1208           (stmt, "failed: evolution of offset is not affine.\n");
1209     }
1210
1211   init = ssize_int (pbytepos);
1212
1213   /* Subtract any constant component from the base and add it to INIT instead.
1214      Adjust the misalignment to reflect the amount we subtracted.  */
1215   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1216   init = size_binop (PLUS_EXPR, init, dinit);
1217   base_misalignment -= TREE_INT_CST_LOW (dinit);
1218
1219   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1220   init = size_binop (PLUS_EXPR, init, dinit);
1221
1222   step = size_binop (PLUS_EXPR,
1223                      fold_convert (ssizetype, base_iv.step),
1224                      fold_convert (ssizetype, offset_iv.step));
1225
1226   base = canonicalize_base_object_address (base_iv.base);
1227
1228   /* See if get_pointer_alignment can guarantee a higher alignment than
1229      the one we calculated above.  */
1230   unsigned int HOST_WIDE_INT alt_misalignment;
1231   unsigned int alt_alignment;
1232   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1233
1234   /* As above, these values must be whole bytes.  */
1235   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1236               && alt_misalignment % BITS_PER_UNIT == 0);
1237   alt_alignment /= BITS_PER_UNIT;
1238   alt_misalignment /= BITS_PER_UNIT;
1239
1240   if (base_alignment < alt_alignment)
1241     {
1242       base_alignment = alt_alignment;
1243       base_misalignment = alt_misalignment;
1244     }
1245
1246   drb->base_address = base;
1247   drb->offset = fold_convert (ssizetype, offset_iv.base);
1248   drb->init = init;
1249   drb->step = step;
1250   if (known_misalignment (base_misalignment, base_alignment,
1251                           &drb->base_misalignment))
1252     drb->base_alignment = base_alignment;
1253   else
1254     {
1255       drb->base_alignment = known_alignment (base_misalignment);
1256       drb->base_misalignment = 0;
1257     }
1258   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1259   drb->step_alignment = highest_pow2_factor (step);
1260
1261   if (dump_file && (dump_flags & TDF_DETAILS))
1262     fprintf (dump_file, "success.\n");
1263
1264   return opt_result::success ();
1265 }
1266
1267 /* Return true if OP is a valid component reference for a DR access
1268    function.  This accepts a subset of what handled_component_p accepts.  */
1269
1270 static bool
1271 access_fn_component_p (tree op)
1272 {
1273   switch (TREE_CODE (op))
1274     {
1275     case REALPART_EXPR:
1276     case IMAGPART_EXPR:
1277     case ARRAY_REF:
1278       return true;
1279
1280     case COMPONENT_REF:
1281       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1282
1283     default:
1284       return false;
1285     }
1286 }
1287
1288 /* Returns whether BASE can have a access_fn_component_p with BASE
1289    as base.  */
1290
1291 static bool
1292 base_supports_access_fn_components_p (tree base)
1293 {
1294   switch (TREE_CODE (TREE_TYPE (base)))
1295     {
1296     case COMPLEX_TYPE:
1297     case ARRAY_TYPE:
1298     case RECORD_TYPE:
1299       return true;
1300     default:
1301       return false;
1302     }
1303 }
1304
1305 /* Determines the base object and the list of indices of memory reference
1306    DR, analyzed in LOOP and instantiated before NEST.  */
1307
1308 static void
1309 dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
1310 {
1311   /* If analyzing a basic-block there are no indices to analyze
1312      and thus no access functions.  */
1313   if (!nest)
1314     {
1315       dri->base_object = ref;
1316       dri->access_fns.create (0);
1317       return;
1318     }
1319
1320   vec<tree> access_fns = vNULL;
1321
1322   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1323      into a two element array with a constant index.  The base is
1324      then just the immediate underlying object.  */
1325   if (TREE_CODE (ref) == REALPART_EXPR)
1326     {
1327       ref = TREE_OPERAND (ref, 0);
1328       access_fns.safe_push (integer_zero_node);
1329     }
1330   else if (TREE_CODE (ref) == IMAGPART_EXPR)
1331     {
1332       ref = TREE_OPERAND (ref, 0);
1333       access_fns.safe_push (integer_one_node);
1334     }
1335
1336   /* Analyze access functions of dimensions we know to be independent.
1337      The list of component references handled here should be kept in
1338      sync with access_fn_component_p.  */
1339   while (handled_component_p (ref))
1340     {
1341       if (TREE_CODE (ref) == ARRAY_REF)
1342         {
1343           tree op = TREE_OPERAND (ref, 1);
1344           tree access_fn = analyze_scalar_evolution (loop, op);
1345           access_fn = instantiate_scev (nest, loop, access_fn);
1346           access_fns.safe_push (access_fn);
1347         }
1348       else if (TREE_CODE (ref) == COMPONENT_REF
1349                && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1350         {
1351           /* For COMPONENT_REFs of records (but not unions!) use the
1352              FIELD_DECL offset as constant access function so we can
1353              disambiguate a[i].f1 and a[i].f2.  */
1354           tree off = component_ref_field_offset (ref);
1355           off = size_binop (PLUS_EXPR,
1356                             size_binop (MULT_EXPR,
1357                                         fold_convert (bitsizetype, off),
1358                                         bitsize_int (BITS_PER_UNIT)),
1359                             DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1360           access_fns.safe_push (off);
1361         }
1362       else
1363         /* If we have an unhandled component we could not translate
1364            to an access function stop analyzing.  We have determined
1365            our base object in this case.  */
1366         break;
1367
1368       ref = TREE_OPERAND (ref, 0);
1369     }
1370
1371   /* If the address operand of a MEM_REF base has an evolution in the
1372      analyzed nest, add it as an additional independent access-function.  */
1373   if (TREE_CODE (ref) == MEM_REF)
1374     {
1375       tree op = TREE_OPERAND (ref, 0);
1376       tree access_fn = analyze_scalar_evolution (loop, op);
1377       access_fn = instantiate_scev (nest, loop, access_fn);
1378       STRIP_NOPS (access_fn);
1379       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1380         {
1381           tree memoff = TREE_OPERAND (ref, 1);
1382           tree base = initial_condition (access_fn);
1383           tree orig_type = TREE_TYPE (base);
1384           STRIP_USELESS_TYPE_CONVERSION (base);
1385           tree off;
1386           split_constant_offset (base, &base, &off);
1387           STRIP_USELESS_TYPE_CONVERSION (base);
1388           /* Fold the MEM_REF offset into the evolutions initial
1389              value to make more bases comparable.  */
1390           if (!integer_zerop (memoff))
1391             {
1392               off = size_binop (PLUS_EXPR, off,
1393                                 fold_convert (ssizetype, memoff));
1394               memoff = build_int_cst (TREE_TYPE (memoff), 0);
1395             }
1396           /* Adjust the offset so it is a multiple of the access type
1397              size and thus we separate bases that can possibly be used
1398              to produce partial overlaps (which the access_fn machinery
1399              cannot handle).  */
1400           wide_int rem;
1401           if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1402               && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1403               && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1404             rem = wi::mod_trunc
1405               (wi::to_wide (off),
1406                wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1407                SIGNED);
1408           else
1409             /* If we can't compute the remainder simply force the initial
1410                condition to zero.  */
1411             rem = wi::to_wide (off);
1412           off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1413           memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1414           /* And finally replace the initial condition.  */
1415           access_fn = chrec_replace_initial_condition
1416               (access_fn, fold_convert (orig_type, off));
1417           /* ???  This is still not a suitable base object for
1418              dr_may_alias_p - the base object needs to be an
1419              access that covers the object as whole.  With
1420              an evolution in the pointer this cannot be
1421              guaranteed.
1422              As a band-aid, mark the access so we can special-case
1423              it in dr_may_alias_p.  */
1424           tree old = ref;
1425           ref = fold_build2_loc (EXPR_LOCATION (ref),
1426                                  MEM_REF, TREE_TYPE (ref),
1427                                  base, memoff);
1428           MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1429           MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1430           dri->unconstrained_base = true;
1431           access_fns.safe_push (access_fn);
1432         }
1433     }
1434   else if (DECL_P (ref))
1435     {
1436       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1437       ref = build2 (MEM_REF, TREE_TYPE (ref),
1438                     build_fold_addr_expr (ref),
1439                     build_int_cst (reference_alias_ptr_type (ref), 0));
1440     }
1441
1442   dri->base_object = ref;
1443   dri->access_fns = access_fns;
1444 }
1445
1446 /* Extracts the alias analysis information from the memory reference DR.  */
1447
1448 static void
1449 dr_analyze_alias (struct data_reference *dr)
1450 {
1451   tree ref = DR_REF (dr);
1452   tree base = get_base_address (ref), addr;
1453
1454   if (INDIRECT_REF_P (base)
1455       || TREE_CODE (base) == MEM_REF)
1456     {
1457       addr = TREE_OPERAND (base, 0);
1458       if (TREE_CODE (addr) == SSA_NAME)
1459         DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1460     }
1461 }
1462
1463 /* Frees data reference DR.  */
1464
1465 void
1466 free_data_ref (data_reference_p dr)
1467 {
1468   DR_ACCESS_FNS (dr).release ();
1469   if (dr->alt_indices.base_object)
1470     dr->alt_indices.access_fns.release ();
1471   free (dr);
1472 }
1473
1474 /* Analyze memory reference MEMREF, which is accessed in STMT.
1475    The reference is a read if IS_READ is true, otherwise it is a write.
1476    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1477    within STMT, i.e. that it might not occur even if STMT is executed
1478    and runs to completion.
1479
1480    Return the data_reference description of MEMREF.  NEST is the outermost
1481    loop in which the reference should be instantiated, LOOP is the loop
1482    in which the data reference should be analyzed.  */
1483
1484 struct data_reference *
1485 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1486                  bool is_read, bool is_conditional_in_stmt)
1487 {
1488   struct data_reference *dr;
1489
1490   if (dump_file && (dump_flags & TDF_DETAILS))
1491     {
1492       fprintf (dump_file, "Creating dr for ");
1493       print_generic_expr (dump_file, memref, TDF_SLIM);
1494       fprintf (dump_file, "\n");
1495     }
1496
1497   dr = XCNEW (struct data_reference);
1498   DR_STMT (dr) = stmt;
1499   DR_REF (dr) = memref;
1500   DR_IS_READ (dr) = is_read;
1501   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1502
1503   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1504                         nest != NULL ? loop : NULL, stmt);
1505   dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop);
1506   dr_analyze_alias (dr);
1507
1508   if (dump_file && (dump_flags & TDF_DETAILS))
1509     {
1510       unsigned i;
1511       fprintf (dump_file, "\tbase_address: ");
1512       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1513       fprintf (dump_file, "\n\toffset from base address: ");
1514       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1515       fprintf (dump_file, "\n\tconstant offset from base address: ");
1516       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1517       fprintf (dump_file, "\n\tstep: ");
1518       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1519       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1520       fprintf (dump_file, "\n\tbase misalignment: %d",
1521                DR_BASE_MISALIGNMENT (dr));
1522       fprintf (dump_file, "\n\toffset alignment: %d",
1523                DR_OFFSET_ALIGNMENT (dr));
1524       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1525       fprintf (dump_file, "\n\tbase_object: ");
1526       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1527       fprintf (dump_file, "\n");
1528       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1529         {
1530           fprintf (dump_file, "\tAccess function %d: ", i);
1531           print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1532         }
1533     }
1534
1535   return dr;
1536 }
1537
1538 /*  A helper function computes order between two tree expressions T1 and T2.
1539     This is used in comparator functions sorting objects based on the order
1540     of tree expressions.  The function returns -1, 0, or 1.  */
1541
1542 int
1543 data_ref_compare_tree (tree t1, tree t2)
1544 {
1545   int i, cmp;
1546   enum tree_code code;
1547   char tclass;
1548
1549   if (t1 == t2)
1550     return 0;
1551   if (t1 == NULL)
1552     return -1;
1553   if (t2 == NULL)
1554     return 1;
1555
1556   STRIP_USELESS_TYPE_CONVERSION (t1);
1557   STRIP_USELESS_TYPE_CONVERSION (t2);
1558   if (t1 == t2)
1559     return 0;
1560
1561   if (TREE_CODE (t1) != TREE_CODE (t2)
1562       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1563     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1564
1565   code = TREE_CODE (t1);
1566   switch (code)
1567     {
1568     case INTEGER_CST:
1569       return tree_int_cst_compare (t1, t2);
1570
1571     case STRING_CST:
1572       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1573         return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1574       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1575                      TREE_STRING_LENGTH (t1));
1576
1577     case SSA_NAME:
1578       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1579         return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1580       break;
1581
1582     default:
1583       if (POLY_INT_CST_P (t1))
1584         return compare_sizes_for_sort (wi::to_poly_widest (t1),
1585                                        wi::to_poly_widest (t2));
1586
1587       tclass = TREE_CODE_CLASS (code);
1588
1589       /* For decls, compare their UIDs.  */
1590       if (tclass == tcc_declaration)
1591         {
1592           if (DECL_UID (t1) != DECL_UID (t2))
1593             return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1594           break;
1595         }
1596       /* For expressions, compare their operands recursively.  */
1597       else if (IS_EXPR_CODE_CLASS (tclass))
1598         {
1599           for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1600             {
1601               cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1602                                            TREE_OPERAND (t2, i));
1603               if (cmp != 0)
1604                 return cmp;
1605             }
1606         }
1607       else
1608         gcc_unreachable ();
1609     }
1610
1611   return 0;
1612 }
1613
1614 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1615    check.  */
1616
1617 opt_result
1618 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1619 {
1620   if (dump_enabled_p ())
1621     dump_printf (MSG_NOTE,
1622                  "consider run-time aliasing test between %T and %T\n",
1623                  DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1624
1625   if (!speed_p)
1626     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1627                                    "runtime alias check not supported when"
1628                                    " optimizing for size.\n");
1629
1630   /* FORNOW: We don't support versioning with outer-loop in either
1631      vectorization or loop distribution.  */
1632   if (loop != NULL && loop->inner != NULL)
1633     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1634                                    "runtime alias check not supported for"
1635                                    " outer loop.\n");
1636
1637   return opt_result::success ();
1638 }
1639
1640 /* Operator == between two dr_with_seg_len objects.
1641
1642    This equality operator is used to make sure two data refs
1643    are the same one so that we will consider to combine the
1644    aliasing checks of those two pairs of data dependent data
1645    refs.  */
1646
1647 static bool
1648 operator == (const dr_with_seg_len& d1,
1649              const dr_with_seg_len& d2)
1650 {
1651   return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1652                            DR_BASE_ADDRESS (d2.dr), 0)
1653           && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1654           && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1655           && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1656           && known_eq (d1.access_size, d2.access_size)
1657           && d1.align == d2.align);
1658 }
1659
1660 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1661    so that we can combine aliasing checks in one scan.  */
1662
1663 static int
1664 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1665 {
1666   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1667   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1668   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1669   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1670
1671   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1672      if a and c have the same basic address snd step, and b and d have the same
1673      address and step.  Therefore, if any a&c or b&d don't have the same address
1674      and step, we don't care the order of those two pairs after sorting.  */
1675   int comp_res;
1676
1677   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1678                                          DR_BASE_ADDRESS (b1.dr))) != 0)
1679     return comp_res;
1680   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1681                                          DR_BASE_ADDRESS (b2.dr))) != 0)
1682     return comp_res;
1683   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1684                                          DR_STEP (b1.dr))) != 0)
1685     return comp_res;
1686   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1687                                          DR_STEP (b2.dr))) != 0)
1688     return comp_res;
1689   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1690                                          DR_OFFSET (b1.dr))) != 0)
1691     return comp_res;
1692   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1693                                          DR_INIT (b1.dr))) != 0)
1694     return comp_res;
1695   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1696                                          DR_OFFSET (b2.dr))) != 0)
1697     return comp_res;
1698   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1699                                          DR_INIT (b2.dr))) != 0)
1700     return comp_res;
1701
1702   return 0;
1703 }
1704
1705 /* Dump information about ALIAS_PAIR, indenting each line by INDENT.  */
1706
1707 static void
1708 dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1709 {
1710   dump_printf (MSG_NOTE, "%sreference:      %T vs. %T\n", indent,
1711                DR_REF (alias_pair->first.dr),
1712                DR_REF (alias_pair->second.dr));
1713
1714   dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1715                alias_pair->first.seg_len);
1716   if (!operand_equal_p (alias_pair->first.seg_len,
1717                         alias_pair->second.seg_len, 0))
1718     dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1719
1720   dump_printf (MSG_NOTE, "\n%saccess size:    ", indent);
1721   dump_dec (MSG_NOTE, alias_pair->first.access_size);
1722   if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1723     {
1724       dump_printf (MSG_NOTE, " vs. ");
1725       dump_dec (MSG_NOTE, alias_pair->second.access_size);
1726     }
1727
1728   dump_printf (MSG_NOTE, "\n%salignment:      %d", indent,
1729                alias_pair->first.align);
1730   if (alias_pair->first.align != alias_pair->second.align)
1731     dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1732
1733   dump_printf (MSG_NOTE, "\n%sflags:         ", indent);
1734   if (alias_pair->flags & DR_ALIAS_RAW)
1735     dump_printf (MSG_NOTE, " RAW");
1736   if (alias_pair->flags & DR_ALIAS_WAR)
1737     dump_printf (MSG_NOTE, " WAR");
1738   if (alias_pair->flags & DR_ALIAS_WAW)
1739     dump_printf (MSG_NOTE, " WAW");
1740   if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1741     dump_printf (MSG_NOTE, " ARBITRARY");
1742   if (alias_pair->flags & DR_ALIAS_SWAPPED)
1743     dump_printf (MSG_NOTE, " SWAPPED");
1744   if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1745     dump_printf (MSG_NOTE, " UNSWAPPED");
1746   if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1747     dump_printf (MSG_NOTE, " MIXED_STEPS");
1748   if (alias_pair->flags == 0)
1749     dump_printf (MSG_NOTE, " <none>");
1750   dump_printf (MSG_NOTE, "\n");
1751 }
1752
1753 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1754    FACTOR is number of iterations that each data reference is accessed.
1755
1756    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1757    we create an expression:
1758
1759    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1760    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1761
1762    for aliasing checks.  However, in some cases we can decrease the number
1763    of checks by combining two checks into one.  For example, suppose we have
1764    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1765    condition is satisfied:
1766
1767    load_ptr_0 < load_ptr_1  &&
1768    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1769
1770    (this condition means, in each iteration of vectorized loop, the accessed
1771    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1772    load_ptr_1.)
1773
1774    we then can use only the following expression to finish the alising checks
1775    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1776
1777    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1778    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1779
1780    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1781    basic address.  */
1782
1783 void
1784 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1785                                poly_uint64)
1786 {
1787   if (alias_pairs->is_empty ())
1788     return;
1789
1790   /* Canonicalize each pair so that the base components are ordered wrt
1791      data_ref_compare_tree.  This allows the loop below to merge more
1792      cases.  */
1793   unsigned int i;
1794   dr_with_seg_len_pair_t *alias_pair;
1795   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1796     {
1797       data_reference_p dr_a = alias_pair->first.dr;
1798       data_reference_p dr_b = alias_pair->second.dr;
1799       int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1800                                             DR_BASE_ADDRESS (dr_b));
1801       if (comp_res == 0)
1802         comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1803       if (comp_res == 0)
1804         comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1805       if (comp_res > 0)
1806         {
1807           std::swap (alias_pair->first, alias_pair->second);
1808           alias_pair->flags |= DR_ALIAS_SWAPPED;
1809         }
1810       else
1811         alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1812     }
1813
1814   /* Sort the collected data ref pairs so that we can scan them once to
1815      combine all possible aliasing checks.  */
1816   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1817
1818   /* Scan the sorted dr pairs and check if we can combine alias checks
1819      of two neighboring dr pairs.  */
1820   unsigned int last = 0;
1821   for (i = 1; i < alias_pairs->length (); ++i)
1822     {
1823       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1824       dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1825       dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1826
1827       dr_with_seg_len *dr_a1 = &alias_pair1->first;
1828       dr_with_seg_len *dr_b1 = &alias_pair1->second;
1829       dr_with_seg_len *dr_a2 = &alias_pair2->first;
1830       dr_with_seg_len *dr_b2 = &alias_pair2->second;
1831
1832       /* Remove duplicate data ref pairs.  */
1833       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1834         {
1835           if (dump_enabled_p ())
1836             dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1837                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1838                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1839           alias_pair1->flags |= alias_pair2->flags;
1840           continue;
1841         }
1842
1843       /* Assume that we won't be able to merge the pairs, then correct
1844          if we do.  */
1845       last += 1;
1846       if (last != i)
1847         (*alias_pairs)[last] = (*alias_pairs)[i];
1848
1849       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1850         {
1851           /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1852              and DR_A1 and DR_A2 are two consecutive memrefs.  */
1853           if (*dr_a1 == *dr_a2)
1854             {
1855               std::swap (dr_a1, dr_b1);
1856               std::swap (dr_a2, dr_b2);
1857             }
1858
1859           poly_int64 init_a1, init_a2;
1860           /* Only consider cases in which the distance between the initial
1861              DR_A1 and the initial DR_A2 is known at compile time.  */
1862           if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1863                                 DR_BASE_ADDRESS (dr_a2->dr), 0)
1864               || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1865                                    DR_OFFSET (dr_a2->dr), 0)
1866               || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1867               || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1868             continue;
1869
1870           /* Don't combine if we can't tell which one comes first.  */
1871           if (!ordered_p (init_a1, init_a2))
1872             continue;
1873
1874           /* Work out what the segment length would be if we did combine
1875              DR_A1 and DR_A2:
1876
1877              - If DR_A1 and DR_A2 have equal lengths, that length is
1878                also the combined length.
1879
1880              - If DR_A1 and DR_A2 both have negative "lengths", the combined
1881                length is the lower bound on those lengths.
1882
1883              - If DR_A1 and DR_A2 both have positive lengths, the combined
1884                length is the upper bound on those lengths.
1885
1886              Other cases are unlikely to give a useful combination.
1887
1888              The lengths both have sizetype, so the sign is taken from
1889              the step instead.  */
1890           poly_uint64 new_seg_len = 0;
1891           bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1892                                                  dr_a2->seg_len, 0);
1893           if (new_seg_len_p)
1894             {
1895               poly_uint64 seg_len_a1, seg_len_a2;
1896               if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1897                   || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1898                 continue;
1899
1900               tree indicator_a = dr_direction_indicator (dr_a1->dr);
1901               if (TREE_CODE (indicator_a) != INTEGER_CST)
1902                 continue;
1903
1904               tree indicator_b = dr_direction_indicator (dr_a2->dr);
1905               if (TREE_CODE (indicator_b) != INTEGER_CST)
1906                 continue;
1907
1908               int sign_a = tree_int_cst_sgn (indicator_a);
1909               int sign_b = tree_int_cst_sgn (indicator_b);
1910
1911               if (sign_a <= 0 && sign_b <= 0)
1912                 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1913               else if (sign_a >= 0 && sign_b >= 0)
1914                 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1915               else
1916                 continue;
1917             }
1918           /* At this point we're committed to merging the refs.  */
1919
1920           /* Make sure dr_a1 starts left of dr_a2.  */
1921           if (maybe_gt (init_a1, init_a2))
1922             {
1923               std::swap (*dr_a1, *dr_a2);
1924               std::swap (init_a1, init_a2);
1925             }
1926
1927           /* The DR_Bs are equal, so only the DR_As can introduce
1928              mixed steps.  */
1929           if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1930             alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1931
1932           if (new_seg_len_p)
1933             {
1934               dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1935                                               new_seg_len);
1936               dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1937             }
1938
1939           /* This is always positive due to the swap above.  */
1940           poly_uint64 diff = init_a2 - init_a1;
1941
1942           /* The new check will start at DR_A1.  Make sure that its access
1943              size encompasses the initial DR_A2.  */
1944           if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1945             {
1946               dr_a1->access_size = upper_bound (dr_a1->access_size,
1947                                                 diff + dr_a2->access_size);
1948               unsigned int new_align = known_alignment (dr_a1->access_size);
1949               dr_a1->align = MIN (dr_a1->align, new_align);
1950             }
1951           if (dump_enabled_p ())
1952             dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1953                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1954                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1955           alias_pair1->flags |= alias_pair2->flags;
1956           last -= 1;
1957         }
1958     }
1959   alias_pairs->truncate (last + 1);
1960
1961   /* Try to restore the original dr_with_seg_len order within each
1962      dr_with_seg_len_pair_t.  If we ended up combining swapped and
1963      unswapped pairs into the same check, we have to invalidate any
1964      RAW, WAR and WAW information for it.  */
1965   if (dump_enabled_p ())
1966     dump_printf (MSG_NOTE, "merged alias checks:\n");
1967   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1968     {
1969       unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1970       unsigned int swapped = (alias_pair->flags & swap_mask);
1971       if (swapped == DR_ALIAS_SWAPPED)
1972         std::swap (alias_pair->first, alias_pair->second);
1973       else if (swapped != DR_ALIAS_UNSWAPPED)
1974         alias_pair->flags |= DR_ALIAS_ARBITRARY;
1975       alias_pair->flags &= ~swap_mask;
1976       if (dump_enabled_p ())
1977         dump_alias_pair (alias_pair, "  ");
1978     }
1979 }
1980
1981 /* A subroutine of create_intersect_range_checks, with a subset of the
1982    same arguments.  Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
1983    to optimize cases in which the references form a simple RAW, WAR or
1984    WAR dependence.  */
1985
1986 static bool
1987 create_ifn_alias_checks (tree *cond_expr,
1988                          const dr_with_seg_len_pair_t &alias_pair)
1989 {
1990   const dr_with_seg_len& dr_a = alias_pair.first;
1991   const dr_with_seg_len& dr_b = alias_pair.second;
1992
1993   /* Check for cases in which:
1994
1995      (a) we have a known RAW, WAR or WAR dependence
1996      (b) the accesses are well-ordered in both the original and new code
1997          (see the comment above the DR_ALIAS_* flags for details); and
1998      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
1999   if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
2000     return false;
2001
2002   /* Make sure that both DRs access the same pattern of bytes,
2003      with a constant length and step.  */
2004   poly_uint64 seg_len;
2005   if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
2006       || !poly_int_tree_p (dr_a.seg_len, &seg_len)
2007       || maybe_ne (dr_a.access_size, dr_b.access_size)
2008       || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
2009       || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
2010     return false;
2011
2012   unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
2013   tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
2014   tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
2015
2016   /* See whether the target suports what we want to do.  WAW checks are
2017      equivalent to WAR checks here.  */
2018   internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
2019                      ? IFN_CHECK_RAW_PTRS
2020                      : IFN_CHECK_WAR_PTRS);
2021   unsigned int align = MIN (dr_a.align, dr_b.align);
2022   poly_uint64 full_length = seg_len + bytes;
2023   if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2024                                            full_length, align))
2025     {
2026       full_length = seg_len + dr_a.access_size;
2027       if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2028                                                full_length, align))
2029         return false;
2030     }
2031
2032   /* Commit to using this form of test.  */
2033   addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
2034   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2035
2036   addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
2037   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2038
2039   *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
2040                                              ifn, boolean_type_node,
2041                                              4, addr_a, addr_b,
2042                                              size_int (full_length),
2043                                              size_int (align));
2044
2045   if (dump_enabled_p ())
2046     {
2047       if (ifn == IFN_CHECK_RAW_PTRS)
2048         dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
2049       else
2050         dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
2051     }
2052   return true;
2053 }
2054
2055 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
2056    free of aliases, using a condition based on index values instead
2057    of a condition based on addresses.  Return true on success,
2058    storing the condition in *COND_EXPR.
2059
2060    This can only be done if the two data references in ALIAS_PAIR access
2061    the same array object and the index is the only difference.  For example,
2062    if the two data references are DR_A and DR_B:
2063
2064                        DR_A                           DR_B
2065       data-ref         arr[i]                         arr[j]
2066       base_object      arr                            arr
2067       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
2068
2069    The addresses and their index are like:
2070
2071         |<- ADDR_A    ->|          |<- ADDR_B    ->|
2072      ------------------------------------------------------->
2073         |   |   |   |   |          |   |   |   |   |
2074      ------------------------------------------------------->
2075         i_0 ...         i_0+4      j_0 ...         j_0+4
2076
2077    We can create expression based on index rather than address:
2078
2079      (unsigned) (i_0 - j_0 + 3) <= 6
2080
2081    i.e. the indices are less than 4 apart.
2082
2083    Note evolution step of index needs to be considered in comparison.  */
2084
2085 static bool
2086 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
2087                                      const dr_with_seg_len_pair_t &alias_pair)
2088 {
2089   const dr_with_seg_len &dr_a = alias_pair.first;
2090   const dr_with_seg_len &dr_b = alias_pair.second;
2091   if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
2092       || integer_zerop (DR_STEP (dr_a.dr))
2093       || integer_zerop (DR_STEP (dr_b.dr))
2094       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2095     return false;
2096
2097   poly_uint64 seg_len1, seg_len2;
2098   if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
2099       || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
2100     return false;
2101
2102   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2103     return false;
2104
2105   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2106     return false;
2107
2108   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2109     return false;
2110
2111   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2112
2113   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2114   unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
2115   if (neg_step)
2116     {
2117       abs_step = -abs_step;
2118       seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
2119       seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
2120     }
2121
2122   /* Infer the number of iterations with which the memory segment is accessed
2123      by DR.  In other words, alias is checked if memory segment accessed by
2124      DR_A in some iterations intersect with memory segment accessed by DR_B
2125      in the same amount iterations.
2126      Note segnment length is a linear function of number of iterations with
2127      DR_STEP as the coefficient.  */
2128   poly_uint64 niter_len1, niter_len2;
2129   if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
2130       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
2131     return false;
2132
2133   /* Divide each access size by the byte step, rounding up.  */
2134   poly_uint64 niter_access1, niter_access2;
2135   if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
2136                         abs_step, &niter_access1)
2137       || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
2138                            abs_step, &niter_access2))
2139     return false;
2140
2141   bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
2142
2143   int found = -1;
2144   for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2145     {
2146       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2147       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2148       /* Two indices must be the same if they are not scev, or not scev wrto
2149          current loop being vecorized.  */
2150       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2151           || TREE_CODE (access2) != POLYNOMIAL_CHREC
2152           || CHREC_VARIABLE (access1) != (unsigned)loop->num
2153           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2154         {
2155           if (operand_equal_p (access1, access2, 0))
2156             continue;
2157
2158           return false;
2159         }
2160       if (found >= 0)
2161         return false;
2162       found = i;
2163     }
2164
2165   /* Ought not to happen in practice, since if all accesses are equal then the
2166      alias should be decidable at compile time.  */
2167   if (found < 0)
2168     return false;
2169
2170   /* The two indices must have the same step.  */
2171   tree access1 = DR_ACCESS_FN (dr_a.dr, found);
2172   tree access2 = DR_ACCESS_FN (dr_b.dr, found);
2173   if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2174     return false;
2175
2176   tree idx_step = CHREC_RIGHT (access1);
2177   /* Index must have const step, otherwise DR_STEP won't be constant.  */
2178   gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2179   /* Index must evaluate in the same direction as DR.  */
2180   gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2181
2182   tree min1 = CHREC_LEFT (access1);
2183   tree min2 = CHREC_LEFT (access2);
2184   if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2185     return false;
2186
2187   /* Ideally, alias can be checked against loop's control IV, but we
2188      need to prove linear mapping between control IV and reference
2189      index.  Although that should be true, we check against (array)
2190      index of data reference.  Like segment length, index length is
2191      linear function of the number of iterations with index_step as
2192      the coefficient, i.e, niter_len * idx_step.  */
2193   offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
2194                                               SIGNED);
2195   if (neg_step)
2196     abs_idx_step = -abs_idx_step;
2197   poly_offset_int idx_len1 = abs_idx_step * niter_len1;
2198   poly_offset_int idx_len2 = abs_idx_step * niter_len2;
2199   poly_offset_int idx_access1 = abs_idx_step * niter_access1;
2200   poly_offset_int idx_access2 = abs_idx_step * niter_access2;
2201
2202   gcc_assert (known_ge (idx_len1, 0)
2203               && known_ge (idx_len2, 0)
2204               && known_ge (idx_access1, 0)
2205               && known_ge (idx_access2, 0));
2206
2207   /* Each access has the following pattern, with lengths measured
2208      in units of INDEX:
2209
2210           <-- idx_len -->
2211           <--- A: -ve step --->
2212           +-----+-------+-----+-------+-----+
2213           | n-1 | ..... |  0  | ..... | n-1 |
2214           +-----+-------+-----+-------+-----+
2215                         <--- B: +ve step --->
2216                         <-- idx_len -->
2217                         |
2218                        min
2219
2220      where "n" is the number of scalar iterations covered by the segment
2221      and where each access spans idx_access units.
2222
2223      A is the range of bytes accessed when the step is negative,
2224      B is the range when the step is positive.
2225
2226      When checking for general overlap, we need to test whether
2227      the range:
2228
2229        [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2230
2231      overlaps:
2232
2233        [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2234
2235      where:
2236
2237         low_offsetN = +ve step ? 0 : -idx_lenN;
2238        high_offsetN = +ve step ? idx_lenN : 0;
2239
2240      This is equivalent to testing whether:
2241
2242        min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2243        && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2244
2245      Converting this into a single test, there is an overlap if:
2246
2247        0 <= min2 - min1 + bias <= limit
2248
2249      where  bias = high_offset2 + idx_access2 - 1 - low_offset1
2250            limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2251                  + (high_offset2 - low_offset2 + idx_access2 - 1)
2252       i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2253
2254      Combining the tests requires limit to be computable in an unsigned
2255      form of the index type; if it isn't, we fall back to the usual
2256      pointer-based checks.
2257
2258      We can do better if DR_B is a write and if DR_A and DR_B are
2259      well-ordered in both the original and the new code (see the
2260      comment above the DR_ALIAS_* flags for details).  In this case
2261      we know that for each i in [0, n-1], the write performed by
2262      access i of DR_B occurs after access numbers j<=i of DR_A in
2263      both the original and the new code.  Any write or anti
2264      dependencies wrt those DR_A accesses are therefore maintained.
2265
2266      We just need to make sure that each individual write in DR_B does not
2267      overlap any higher-indexed access in DR_A; such DR_A accesses happen
2268      after the DR_B access in the original code but happen before it in
2269      the new code.
2270
2271      We know the steps for both accesses are equal, so by induction, we
2272      just need to test whether the first write of DR_B overlaps a later
2273      access of DR_A.  In other words, we need to move min1 along by
2274      one iteration:
2275
2276        min1' = min1 + idx_step
2277
2278      and use the ranges:
2279
2280        [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2281
2282      and:
2283
2284        [min2, min2 + idx_access2 - 1]
2285
2286      where:
2287
2288         low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2289        high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0.  */
2290   if (waw_or_war_p)
2291     idx_len1 -= abs_idx_step;
2292
2293   poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2294   if (!waw_or_war_p)
2295     limit += idx_len2;
2296
2297   tree utype = unsigned_type_for (TREE_TYPE (min1));
2298   if (!wi::fits_to_tree_p (limit, utype))
2299     return false;
2300
2301   poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2302   poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2303   poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2304   /* Equivalent to adding IDX_STEP to MIN1.  */
2305   if (waw_or_war_p)
2306     bias -= wi::to_offset (idx_step);
2307
2308   tree subject = fold_build2 (MINUS_EXPR, utype,
2309                               fold_convert (utype, min2),
2310                               fold_convert (utype, min1));
2311   subject = fold_build2 (PLUS_EXPR, utype, subject,
2312                          wide_int_to_tree (utype, bias));
2313   tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2314                                      wide_int_to_tree (utype, limit));
2315   if (*cond_expr)
2316     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2317                               *cond_expr, part_cond_expr);
2318   else
2319     *cond_expr = part_cond_expr;
2320   if (dump_enabled_p ())
2321     {
2322       if (waw_or_war_p)
2323         dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2324       else
2325         dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2326     }
2327   return true;
2328 }
2329
2330 /* A subroutine of create_intersect_range_checks, with a subset of the
2331    same arguments.  Try to optimize cases in which the second access
2332    is a write and in which some overlap is valid.  */
2333
2334 static bool
2335 create_waw_or_war_checks (tree *cond_expr,
2336                           const dr_with_seg_len_pair_t &alias_pair)
2337 {
2338   const dr_with_seg_len& dr_a = alias_pair.first;
2339   const dr_with_seg_len& dr_b = alias_pair.second;
2340
2341   /* Check for cases in which:
2342
2343      (a) DR_B is always a write;
2344      (b) the accesses are well-ordered in both the original and new code
2345          (see the comment above the DR_ALIAS_* flags for details); and
2346      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
2347   if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2348     return false;
2349
2350   /* Check for equal (but possibly variable) steps.  */
2351   tree step = DR_STEP (dr_a.dr);
2352   if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2353     return false;
2354
2355   /* Make sure that we can operate on sizetype without loss of precision.  */
2356   tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2357   if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2358     return false;
2359
2360   /* All addresses involved are known to have a common alignment ALIGN.
2361      We can therefore subtract ALIGN from an exclusive endpoint to get
2362      an inclusive endpoint.  In the best (and common) case, ALIGN is the
2363      same as the access sizes of both DRs, and so subtracting ALIGN
2364      cancels out the addition of an access size.  */
2365   unsigned int align = MIN (dr_a.align, dr_b.align);
2366   poly_uint64 last_chunk_a = dr_a.access_size - align;
2367   poly_uint64 last_chunk_b = dr_b.access_size - align;
2368
2369   /* Get a boolean expression that is true when the step is negative.  */
2370   tree indicator = dr_direction_indicator (dr_a.dr);
2371   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2372                                fold_convert (ssizetype, indicator),
2373                                ssize_int (0));
2374
2375   /* Get lengths in sizetype.  */
2376   tree seg_len_a
2377     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2378   step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2379
2380   /* Each access has the following pattern:
2381
2382           <- |seg_len| ->
2383           <--- A: -ve step --->
2384           +-----+-------+-----+-------+-----+
2385           | n-1 | ..... |  0  | ..... | n-1 |
2386           +-----+-------+-----+-------+-----+
2387                         <--- B: +ve step --->
2388                         <- |seg_len| ->
2389                         |
2390                    base address
2391
2392      where "n" is the number of scalar iterations covered by the segment.
2393
2394      A is the range of bytes accessed when the step is negative,
2395      B is the range when the step is positive.
2396
2397      We know that DR_B is a write.  We also know (from checking that
2398      DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2399      the write performed by access i of DR_B occurs after access numbers
2400      j<=i of DR_A in both the original and the new code.  Any write or
2401      anti dependencies wrt those DR_A accesses are therefore maintained.
2402
2403      We just need to make sure that each individual write in DR_B does not
2404      overlap any higher-indexed access in DR_A; such DR_A accesses happen
2405      after the DR_B access in the original code but happen before it in
2406      the new code.
2407
2408      We know the steps for both accesses are equal, so by induction, we
2409      just need to test whether the first write of DR_B overlaps a later
2410      access of DR_A.  In other words, we need to move addr_a along by
2411      one iteration:
2412
2413        addr_a' = addr_a + step
2414
2415      and check whether:
2416
2417        [addr_b, addr_b + last_chunk_b]
2418
2419      overlaps:
2420
2421        [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2422
2423      where [low_offset_a, high_offset_a] spans accesses [1, n-1].  I.e.:
2424
2425         low_offset_a = +ve step ? 0 : seg_len_a - step
2426        high_offset_a = +ve step ? seg_len_a - step : 0
2427
2428      This is equivalent to testing whether:
2429
2430        addr_a' + low_offset_a <= addr_b + last_chunk_b
2431        && addr_b <= addr_a' + high_offset_a + last_chunk_a
2432
2433      Converting this into a single test, there is an overlap if:
2434
2435        0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2436
2437      where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2438
2439      If DR_A is performed, limit + |step| - last_chunk_b is known to be
2440      less than the size of the object underlying DR_A.  We also know
2441      that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2442      guaranteed at compile time.  There can therefore be no overflow if
2443      "limit" is calculated in an unsigned type with pointer precision.  */
2444   tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2445                                          DR_OFFSET (dr_a.dr));
2446   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2447
2448   tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2449                                          DR_OFFSET (dr_b.dr));
2450   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2451
2452   /* Advance ADDR_A by one iteration and adjust the length to compensate.  */
2453   addr_a = fold_build_pointer_plus (addr_a, step);
2454   tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2455                                            seg_len_a, step);
2456   if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2457     seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2458
2459   tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2460                                    seg_len_a_minus_step, size_zero_node);
2461   if (!CONSTANT_CLASS_P (low_offset_a))
2462     low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2463
2464   /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2465      but it's usually more efficient to reuse the LOW_OFFSET_A result.  */
2466   tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2467                                     low_offset_a);
2468
2469   /* The amount added to addr_b - addr_a'.  */
2470   tree bias = fold_build2 (MINUS_EXPR, sizetype,
2471                            size_int (last_chunk_b), low_offset_a);
2472
2473   tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2474   limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2475                        size_int (last_chunk_a + last_chunk_b));
2476
2477   tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
2478   subject = fold_build2 (PLUS_EXPR, sizetype,
2479                          fold_convert (sizetype, subject), bias);
2480
2481   *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2482   if (dump_enabled_p ())
2483     dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2484   return true;
2485 }
2486
2487 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2488    every address ADDR accessed by D:
2489
2490      *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2491
2492    In this case, every element accessed by D is aligned to at least
2493    ALIGN bytes.
2494
2495    If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2496
2497      *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT.  */
2498
2499 static void
2500 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2501                      tree *seg_max_out, HOST_WIDE_INT align)
2502 {
2503   /* Each access has the following pattern:
2504
2505           <- |seg_len| ->
2506           <--- A: -ve step --->
2507           +-----+-------+-----+-------+-----+
2508           | n-1 | ,.... |  0  | ..... | n-1 |
2509           +-----+-------+-----+-------+-----+
2510                         <--- B: +ve step --->
2511                         <- |seg_len| ->
2512                         |
2513                    base address
2514
2515      where "n" is the number of scalar iterations covered by the segment.
2516      (This should be VF for a particular pair if we know that both steps
2517      are the same, otherwise it will be the full number of scalar loop
2518      iterations.)
2519
2520      A is the range of bytes accessed when the step is negative,
2521      B is the range when the step is positive.
2522
2523      If the access size is "access_size" bytes, the lowest addressed byte is:
2524
2525          base + (step < 0 ? seg_len : 0)   [LB]
2526
2527      and the highest addressed byte is always below:
2528
2529          base + (step < 0 ? 0 : seg_len) + access_size   [UB]
2530
2531      Thus:
2532
2533          LB <= ADDR < UB
2534
2535      If ALIGN is nonzero, all three values are aligned to at least ALIGN
2536      bytes, so:
2537
2538          LB <= ADDR <= UB - ALIGN
2539
2540      where "- ALIGN" folds naturally with the "+ access_size" and often
2541      cancels it out.
2542
2543      We don't try to simplify LB and UB beyond this (e.g. by using
2544      MIN and MAX based on whether seg_len rather than the stride is
2545      negative) because it is possible for the absolute size of the
2546      segment to overflow the range of a ssize_t.
2547
2548      Keeping the pointer_plus outside of the cond_expr should allow
2549      the cond_exprs to be shared with other alias checks.  */
2550   tree indicator = dr_direction_indicator (d.dr);
2551   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2552                                fold_convert (ssizetype, indicator),
2553                                ssize_int (0));
2554   tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2555                                             DR_OFFSET (d.dr));
2556   addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2557   tree seg_len
2558     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2559
2560   tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2561                                 seg_len, size_zero_node);
2562   tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2563                                 size_zero_node, seg_len);
2564   max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2565                            size_int (d.access_size - align));
2566
2567   *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2568   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2569 }
2570
2571 /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2572    storing the condition in *COND_EXPR.  The fallback is to generate a
2573    a test that the two accesses do not overlap:
2574
2575      end_a <= start_b || end_b <= start_a.  */
2576
2577 static void
2578 create_intersect_range_checks (class loop *loop, tree *cond_expr,
2579                                const dr_with_seg_len_pair_t &alias_pair)
2580 {
2581   const dr_with_seg_len& dr_a = alias_pair.first;
2582   const dr_with_seg_len& dr_b = alias_pair.second;
2583   *cond_expr = NULL_TREE;
2584   if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2585     return;
2586
2587   if (create_ifn_alias_checks (cond_expr, alias_pair))
2588     return;
2589
2590   if (create_waw_or_war_checks (cond_expr, alias_pair))
2591     return;
2592
2593   unsigned HOST_WIDE_INT min_align;
2594   tree_code cmp_code;
2595   /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2596      are equivalent.  This is just an optimization heuristic.  */
2597   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2598       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2599     {
2600       /* In this case adding access_size to seg_len is likely to give
2601          a simple X * step, where X is either the number of scalar
2602          iterations or the vectorization factor.  We're better off
2603          keeping that, rather than subtracting an alignment from it.
2604
2605          In this case the maximum values are exclusive and so there is
2606          no alias if the maximum of one segment equals the minimum
2607          of another.  */
2608       min_align = 0;
2609       cmp_code = LE_EXPR;
2610     }
2611   else
2612     {
2613       /* Calculate the minimum alignment shared by all four pointers,
2614          then arrange for this alignment to be subtracted from the
2615          exclusive maximum values to get inclusive maximum values.
2616          This "- min_align" is cumulative with a "+ access_size"
2617          in the calculation of the maximum values.  In the best
2618          (and common) case, the two cancel each other out, leaving
2619          us with an inclusive bound based only on seg_len.  In the
2620          worst case we're simply adding a smaller number than before.
2621
2622          Because the maximum values are inclusive, there is an alias
2623          if the maximum value of one segment is equal to the minimum
2624          value of the other.  */
2625       min_align = MIN (dr_a.align, dr_b.align);
2626       cmp_code = LT_EXPR;
2627     }
2628
2629   tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2630   get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2631   get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2632
2633   *cond_expr
2634     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2635         fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2636         fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2637   if (dump_enabled_p ())
2638     dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2639 }
2640
2641 /* Create a conditional expression that represents the run-time checks for
2642    overlapping of address ranges represented by a list of data references
2643    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
2644    COND_EXPR is the conditional expression to be used in the if statement
2645    that controls which version of the loop gets executed at runtime.  */
2646
2647 void
2648 create_runtime_alias_checks (class loop *loop,
2649                              const vec<dr_with_seg_len_pair_t> *alias_pairs,
2650                              tree * cond_expr)
2651 {
2652   tree part_cond_expr;
2653
2654   fold_defer_overflow_warnings ();
2655   for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
2656     {
2657       gcc_assert (alias_pair.flags);
2658       if (dump_enabled_p ())
2659         dump_printf (MSG_NOTE,
2660                      "create runtime check for data references %T and %T\n",
2661                      DR_REF (alias_pair.first.dr),
2662                      DR_REF (alias_pair.second.dr));
2663
2664       /* Create condition expression for each pair data references.  */
2665       create_intersect_range_checks (loop, &part_cond_expr, alias_pair);
2666       if (*cond_expr)
2667         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2668                                   *cond_expr, part_cond_expr);
2669       else
2670         *cond_expr = part_cond_expr;
2671     }
2672   fold_undefer_and_ignore_overflow_warnings ();
2673 }
2674
2675 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2676    expressions.  */
2677 static bool
2678 dr_equal_offsets_p1 (tree offset1, tree offset2)
2679 {
2680   bool res;
2681
2682   STRIP_NOPS (offset1);
2683   STRIP_NOPS (offset2);
2684
2685   if (offset1 == offset2)
2686     return true;
2687
2688   if (TREE_CODE (offset1) != TREE_CODE (offset2)
2689       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2690     return false;
2691
2692   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2693                              TREE_OPERAND (offset2, 0));
2694
2695   if (!res || !BINARY_CLASS_P (offset1))
2696     return res;
2697
2698   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2699                              TREE_OPERAND (offset2, 1));
2700
2701   return res;
2702 }
2703
2704 /* Check if DRA and DRB have equal offsets.  */
2705 bool
2706 dr_equal_offsets_p (struct data_reference *dra,
2707                     struct data_reference *drb)
2708 {
2709   tree offset1, offset2;
2710
2711   offset1 = DR_OFFSET (dra);
2712   offset2 = DR_OFFSET (drb);
2713
2714   return dr_equal_offsets_p1 (offset1, offset2);
2715 }
2716
2717 /* Returns true if FNA == FNB.  */
2718
2719 static bool
2720 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2721 {
2722   unsigned i, n = fna.length ();
2723
2724   if (n != fnb.length ())
2725     return false;
2726
2727   for (i = 0; i < n; i++)
2728     if (!operand_equal_p (fna[i], fnb[i], 0))
2729       return false;
2730
2731   return true;
2732 }
2733
2734 /* If all the functions in CF are the same, returns one of them,
2735    otherwise returns NULL.  */
2736
2737 static affine_fn
2738 common_affine_function (conflict_function *cf)
2739 {
2740   unsigned i;
2741   affine_fn comm;
2742
2743   if (!CF_NONTRIVIAL_P (cf))
2744     return affine_fn ();
2745
2746   comm = cf->fns[0];
2747
2748   for (i = 1; i < cf->n; i++)
2749     if (!affine_function_equal_p (comm, cf->fns[i]))
2750       return affine_fn ();
2751
2752   return comm;
2753 }
2754
2755 /* Returns the base of the affine function FN.  */
2756
2757 static tree
2758 affine_function_base (affine_fn fn)
2759 {
2760   return fn[0];
2761 }
2762
2763 /* Returns true if FN is a constant.  */
2764
2765 static bool
2766 affine_function_constant_p (affine_fn fn)
2767 {
2768   unsigned i;
2769   tree coef;
2770
2771   for (i = 1; fn.iterate (i, &coef); i++)
2772     if (!integer_zerop (coef))
2773       return false;
2774
2775   return true;
2776 }
2777
2778 /* Returns true if FN is the zero constant function.  */
2779
2780 static bool
2781 affine_function_zero_p (affine_fn fn)
2782 {
2783   return (integer_zerop (affine_function_base (fn))
2784           && affine_function_constant_p (fn));
2785 }
2786
2787 /* Returns a signed integer type with the largest precision from TA
2788    and TB.  */
2789
2790 static tree
2791 signed_type_for_types (tree ta, tree tb)
2792 {
2793   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2794     return signed_type_for (ta);
2795   else
2796     return signed_type_for (tb);
2797 }
2798
2799 /* Applies operation OP on affine functions FNA and FNB, and returns the
2800    result.  */
2801
2802 static affine_fn
2803 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2804 {
2805   unsigned i, n, m;
2806   affine_fn ret;
2807   tree coef;
2808
2809   if (fnb.length () > fna.length ())
2810     {
2811       n = fna.length ();
2812       m = fnb.length ();
2813     }
2814   else
2815     {
2816       n = fnb.length ();
2817       m = fna.length ();
2818     }
2819
2820   ret.create (m);
2821   for (i = 0; i < n; i++)
2822     {
2823       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2824                                          TREE_TYPE (fnb[i]));
2825       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2826     }
2827
2828   for (; fna.iterate (i, &coef); i++)
2829     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2830                                  coef, integer_zero_node));
2831   for (; fnb.iterate (i, &coef); i++)
2832     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2833                                  integer_zero_node, coef));
2834
2835   return ret;
2836 }
2837
2838 /* Returns the sum of affine functions FNA and FNB.  */
2839
2840 static affine_fn
2841 affine_fn_plus (affine_fn fna, affine_fn fnb)
2842 {
2843   return affine_fn_op (PLUS_EXPR, fna, fnb);
2844 }
2845
2846 /* Returns the difference of affine functions FNA and FNB.  */
2847
2848 static affine_fn
2849 affine_fn_minus (affine_fn fna, affine_fn fnb)
2850 {
2851   return affine_fn_op (MINUS_EXPR, fna, fnb);
2852 }
2853
2854 /* Frees affine function FN.  */
2855
2856 static void
2857 affine_fn_free (affine_fn fn)
2858 {
2859   fn.release ();
2860 }
2861
2862 /* Determine for each subscript in the data dependence relation DDR
2863    the distance.  */
2864
2865 static void
2866 compute_subscript_distance (struct data_dependence_relation *ddr)
2867 {
2868   conflict_function *cf_a, *cf_b;
2869   affine_fn fn_a, fn_b, diff;
2870
2871   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2872     {
2873       unsigned int i;
2874
2875       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2876         {
2877           struct subscript *subscript;
2878
2879           subscript = DDR_SUBSCRIPT (ddr, i);
2880           cf_a = SUB_CONFLICTS_IN_A (subscript);
2881           cf_b = SUB_CONFLICTS_IN_B (subscript);
2882
2883           fn_a = common_affine_function (cf_a);
2884           fn_b = common_affine_function (cf_b);
2885           if (!fn_a.exists () || !fn_b.exists ())
2886             {
2887               SUB_DISTANCE (subscript) = chrec_dont_know;
2888               return;
2889             }
2890           diff = affine_fn_minus (fn_a, fn_b);
2891
2892           if (affine_function_constant_p (diff))
2893             SUB_DISTANCE (subscript) = affine_function_base (diff);
2894           else
2895             SUB_DISTANCE (subscript) = chrec_dont_know;
2896
2897           affine_fn_free (diff);
2898         }
2899     }
2900 }
2901
2902 /* Returns the conflict function for "unknown".  */
2903
2904 static conflict_function *
2905 conflict_fn_not_known (void)
2906 {
2907   conflict_function *fn = XCNEW (conflict_function);
2908   fn->n = NOT_KNOWN;
2909
2910   return fn;
2911 }
2912
2913 /* Returns the conflict function for "independent".  */
2914
2915 static conflict_function *
2916 conflict_fn_no_dependence (void)
2917 {
2918   conflict_function *fn = XCNEW (conflict_function);
2919   fn->n = NO_DEPENDENCE;
2920
2921   return fn;
2922 }
2923
2924 /* Returns true if the address of OBJ is invariant in LOOP.  */
2925
2926 static bool
2927 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2928 {
2929   while (handled_component_p (obj))
2930     {
2931       if (TREE_CODE (obj) == ARRAY_REF)
2932         {
2933           for (int i = 1; i < 4; ++i)
2934             if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2935                                                         loop->num))
2936               return false;
2937         }
2938       else if (TREE_CODE (obj) == COMPONENT_REF)
2939         {
2940           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2941                                                       loop->num))
2942             return false;
2943         }
2944       obj = TREE_OPERAND (obj, 0);
2945     }
2946
2947   if (!INDIRECT_REF_P (obj)
2948       && TREE_CODE (obj) != MEM_REF)
2949     return true;
2950
2951   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2952                                                   loop->num);
2953 }
2954
2955 /* Returns false if we can prove that data references A and B do not alias,
2956    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2957    considered.  */
2958
2959 bool
2960 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2961                 class loop *loop_nest)
2962 {
2963   tree addr_a = DR_BASE_OBJECT (a);
2964   tree addr_b = DR_BASE_OBJECT (b);
2965
2966   /* If we are not processing a loop nest but scalar code we
2967      do not need to care about possible cross-iteration dependences
2968      and thus can process the full original reference.  Do so,
2969      similar to how loop invariant motion applies extra offset-based
2970      disambiguation.  */
2971   if (!loop_nest)
2972     {
2973       tree tree_size_a = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a)));
2974       tree tree_size_b = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b)));
2975
2976       if (DR_BASE_ADDRESS (a)
2977           && DR_BASE_ADDRESS (b)
2978           && operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b))
2979           && operand_equal_p (DR_OFFSET (a), DR_OFFSET (b))
2980           && poly_int_tree_p (tree_size_a)
2981           && poly_int_tree_p (tree_size_b)
2982           && !ranges_maybe_overlap_p (wi::to_poly_widest (DR_INIT (a)),
2983                                       wi::to_poly_widest (tree_size_a),
2984                                       wi::to_poly_widest (DR_INIT (b)),
2985                                       wi::to_poly_widest (tree_size_b)))
2986         {
2987           gcc_assert (integer_zerop (DR_STEP (a))
2988                       && integer_zerop (DR_STEP (b)));
2989           return false;
2990         }
2991
2992       aff_tree off1, off2;
2993       poly_widest_int size1, size2;
2994       get_inner_reference_aff (DR_REF (a), &off1, &size1);
2995       get_inner_reference_aff (DR_REF (b), &off2, &size2);
2996       aff_combination_scale (&off1, -1);
2997       aff_combination_add (&off2, &off1);
2998       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2999         return false;
3000     }
3001
3002   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
3003       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
3004       /* For cross-iteration dependences the cliques must be valid for the
3005          whole loop, not just individual iterations.  */
3006       && (!loop_nest
3007           || MR_DEPENDENCE_CLIQUE (addr_a) == 1
3008           || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
3009       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
3010       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
3011     return false;
3012
3013   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
3014      do not know the size of the base-object.  So we cannot do any
3015      offset/overlap based analysis but have to rely on points-to
3016      information only.  */
3017   if (TREE_CODE (addr_a) == MEM_REF
3018       && (DR_UNCONSTRAINED_BASE (a)
3019           || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
3020     {
3021       /* For true dependences we can apply TBAA.  */
3022       if (flag_strict_aliasing
3023           && DR_IS_WRITE (a) && DR_IS_READ (b)
3024           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3025                                      get_alias_set (DR_REF (b))))
3026         return false;
3027       if (TREE_CODE (addr_b) == MEM_REF)
3028         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3029                                        TREE_OPERAND (addr_b, 0));
3030       else
3031         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3032                                        build_fold_addr_expr (addr_b));
3033     }
3034   else if (TREE_CODE (addr_b) == MEM_REF
3035            && (DR_UNCONSTRAINED_BASE (b)
3036                || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
3037     {
3038       /* For true dependences we can apply TBAA.  */
3039       if (flag_strict_aliasing
3040           && DR_IS_WRITE (a) && DR_IS_READ (b)
3041           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3042                                      get_alias_set (DR_REF (b))))
3043         return false;
3044       if (TREE_CODE (addr_a) == MEM_REF)
3045         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3046                                        TREE_OPERAND (addr_b, 0));
3047       else
3048         return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3049                                        TREE_OPERAND (addr_b, 0));
3050     }
3051
3052   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3053      that is being subsetted in the loop nest.  */
3054   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
3055     return refs_output_dependent_p (addr_a, addr_b);
3056   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
3057     return refs_anti_dependent_p (addr_a, addr_b);
3058   return refs_may_alias_p (addr_a, addr_b);
3059 }
3060
3061 /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
3062    if it is meaningful to compare their associated access functions
3063    when checking for dependencies.  */
3064
3065 static bool
3066 access_fn_components_comparable_p (tree ref_a, tree ref_b)
3067 {
3068   /* Allow pairs of component refs from the following sets:
3069
3070        { REALPART_EXPR, IMAGPART_EXPR }
3071        { COMPONENT_REF }
3072        { ARRAY_REF }.  */
3073   tree_code code_a = TREE_CODE (ref_a);
3074   tree_code code_b = TREE_CODE (ref_b);
3075   if (code_a == IMAGPART_EXPR)
3076     code_a = REALPART_EXPR;
3077   if (code_b == IMAGPART_EXPR)
3078     code_b = REALPART_EXPR;
3079   if (code_a != code_b)
3080     return false;
3081
3082   if (TREE_CODE (ref_a) == COMPONENT_REF)
3083     /* ??? We cannot simply use the type of operand #0 of the refs here as
3084        the Fortran compiler smuggles type punning into COMPONENT_REFs.
3085        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
3086     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
3087             == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
3088
3089   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
3090                              TREE_TYPE (TREE_OPERAND (ref_b, 0)));
3091 }
3092
3093 /* Initialize a data dependence relation RES in LOOP_NEST.  USE_ALT_INDICES
3094    is true when the main indices of A and B were not comparable so we try again
3095    with alternate indices computed on an indirect reference.  */
3096
3097 struct data_dependence_relation *
3098 initialize_data_dependence_relation (struct data_dependence_relation *res,
3099                                      vec<loop_p> loop_nest,
3100                                      bool use_alt_indices)
3101 {
3102   struct data_reference *a = DDR_A (res);
3103   struct data_reference *b = DDR_B (res);
3104   unsigned int i;
3105
3106   struct indices *indices_a = &a->indices;
3107   struct indices *indices_b = &b->indices;
3108   if (use_alt_indices)
3109     {
3110       if (TREE_CODE (DR_REF (a)) != MEM_REF)
3111         indices_a = &a->alt_indices;
3112       if (TREE_CODE (DR_REF (b)) != MEM_REF)
3113         indices_b = &b->alt_indices;
3114     }
3115   unsigned int num_dimensions_a = indices_a->access_fns.length ();
3116   unsigned int num_dimensions_b = indices_b->access_fns.length ();
3117   if (num_dimensions_a == 0 || num_dimensions_b == 0)
3118     {
3119       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3120       return res;
3121     }
3122
3123   /* For unconstrained bases, the root (highest-indexed) subscript
3124      describes a variation in the base of the original DR_REF rather
3125      than a component access.  We have no type that accurately describes
3126      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3127      applying this subscript) so limit the search to the last real
3128      component access.
3129
3130      E.g. for:
3131
3132         void
3133         f (int a[][8], int b[][8])
3134         {
3135           for (int i = 0; i < 8; ++i)
3136             a[i * 2][0] = b[i][0];
3137         }
3138
3139      the a and b accesses have a single ARRAY_REF component reference [0]
3140      but have two subscripts.  */
3141   if (indices_a->unconstrained_base)
3142     num_dimensions_a -= 1;
3143   if (indices_b->unconstrained_base)
3144     num_dimensions_b -= 1;
3145
3146   /* These structures describe sequences of component references in
3147      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
3148      specific access function.  */
3149   struct {
3150     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3151        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3152        indices.  In C notation, these are the indices of the rightmost
3153        component references; e.g. for a sequence .b.c.d, the start
3154        index is for .d.  */
3155     unsigned int start_a;
3156     unsigned int start_b;
3157
3158     /* The sequence contains LENGTH consecutive access functions from
3159        each DR.  */
3160     unsigned int length;
3161
3162     /* The enclosing objects for the A and B sequences respectively,
3163        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3164        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
3165     tree object_a;
3166     tree object_b;
3167   } full_seq = {}, struct_seq = {};
3168
3169   /* Before each iteration of the loop:
3170
3171      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3172      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
3173   unsigned int index_a = 0;
3174   unsigned int index_b = 0;
3175   tree ref_a = DR_REF (a);
3176   tree ref_b = DR_REF (b);
3177
3178   /* Now walk the component references from the final DR_REFs back up to
3179      the enclosing base objects.  Each component reference corresponds
3180      to one access function in the DR, with access function 0 being for
3181      the final DR_REF and the highest-indexed access function being the
3182      one that is applied to the base of the DR.
3183
3184      Look for a sequence of component references whose access functions
3185      are comparable (see access_fn_components_comparable_p).  If more
3186      than one such sequence exists, pick the one nearest the base
3187      (which is the leftmost sequence in C notation).  Store this sequence
3188      in FULL_SEQ.
3189
3190      For example, if we have:
3191
3192         struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3193
3194         A: a[0][i].s.c.d
3195         B: __real b[0][i].s.e[i].f
3196
3197      (where d is the same type as the real component of f) then the access
3198      functions would be:
3199
3200                          0   1   2   3
3201         A:              .d  .c  .s [i]
3202
3203                  0   1   2   3   4   5
3204         B:  __real  .f [i]  .e  .s [i]
3205
3206      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3207      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
3208      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
3209      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3210      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
3211      index foo[10] arrays, so is again comparable.  The sequence is
3212      therefore:
3213
3214         A: [1, 3]  (i.e. [i].s.c)
3215         B: [3, 5]  (i.e. [i].s.e)
3216
3217      Also look for sequences of component references whose access
3218      functions are comparable and whose enclosing objects have the same
3219      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
3220      example, STRUCT_SEQ would be:
3221
3222         A: [1, 2]  (i.e. s.c)
3223         B: [3, 4]  (i.e. s.e)  */
3224   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
3225     {
3226       /* The alternate indices form always has a single dimension
3227          with unconstrained base.  */
3228       gcc_assert (!use_alt_indices);
3229
3230       /* REF_A and REF_B must be one of the component access types
3231          allowed by dr_analyze_indices.  */
3232       gcc_checking_assert (access_fn_component_p (ref_a));
3233       gcc_checking_assert (access_fn_component_p (ref_b));
3234
3235       /* Get the immediately-enclosing objects for REF_A and REF_B,
3236          i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3237          and DR_ACCESS_FN (B, INDEX_B).  */
3238       tree object_a = TREE_OPERAND (ref_a, 0);
3239       tree object_b = TREE_OPERAND (ref_b, 0);
3240
3241       tree type_a = TREE_TYPE (object_a);
3242       tree type_b = TREE_TYPE (object_b);
3243       if (access_fn_components_comparable_p (ref_a, ref_b))
3244         {
3245           /* This pair of component accesses is comparable for dependence
3246              analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3247              DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
3248           if (full_seq.start_a + full_seq.length != index_a
3249               || full_seq.start_b + full_seq.length != index_b)
3250             {
3251               /* The accesses don't extend the current sequence,
3252                  so start a new one here.  */
3253               full_seq.start_a = index_a;
3254               full_seq.start_b = index_b;
3255               full_seq.length = 0;
3256             }
3257
3258           /* Add this pair of references to the sequence.  */
3259           full_seq.length += 1;
3260           full_seq.object_a = object_a;
3261           full_seq.object_b = object_b;
3262
3263           /* If the enclosing objects are structures (and thus have the
3264              same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
3265           if (TREE_CODE (type_a) == RECORD_TYPE)
3266             struct_seq = full_seq;
3267
3268           /* Move to the next containing reference for both A and B.  */
3269           ref_a = object_a;
3270           ref_b = object_b;
3271           index_a += 1;
3272           index_b += 1;
3273           continue;
3274         }
3275
3276       /* Try to approach equal type sizes.  */
3277       if (!COMPLETE_TYPE_P (type_a)
3278           || !COMPLETE_TYPE_P (type_b)
3279           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3280           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3281         break;
3282
3283       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3284       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3285       if (size_a <= size_b)
3286         {
3287           index_a += 1;
3288           ref_a = object_a;
3289         }
3290       if (size_b <= size_a)
3291         {
3292           index_b += 1;
3293           ref_b = object_b;
3294         }
3295     }
3296
3297   /* See whether FULL_SEQ ends at the base and whether the two bases
3298      are equal.  We do not care about TBAA or alignment info so we can
3299      use OEP_ADDRESS_OF to avoid false negatives.  */
3300   tree base_a = indices_a->base_object;
3301   tree base_b = indices_b->base_object;
3302   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3303                       && full_seq.start_b + full_seq.length == num_dimensions_b
3304                       && (indices_a->unconstrained_base
3305                           == indices_b->unconstrained_base)
3306                       && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3307                       && (types_compatible_p (TREE_TYPE (base_a),
3308                                               TREE_TYPE (base_b))
3309                           || (!base_supports_access_fn_components_p (base_a)
3310                               && !base_supports_access_fn_components_p (base_b)
3311                               && operand_equal_p
3312                                    (TYPE_SIZE (TREE_TYPE (base_a)),
3313                                     TYPE_SIZE (TREE_TYPE (base_b)), 0)))
3314                       && (!loop_nest.exists ()
3315                           || (object_address_invariant_in_loop_p
3316                               (loop_nest[0], base_a))));
3317
3318   /* If the bases are the same, we can include the base variation too.
3319      E.g. the b accesses in:
3320
3321        for (int i = 0; i < n; ++i)
3322          b[i + 4][0] = b[i][0];
3323
3324      have a definite dependence distance of 4, while for:
3325
3326        for (int i = 0; i < n; ++i)
3327          a[i + 4][0] = b[i][0];
3328
3329      the dependence distance depends on the gap between a and b.
3330
3331      If the bases are different then we can only rely on the sequence
3332      rooted at a structure access, since arrays are allowed to overlap
3333      arbitrarily and change shape arbitrarily.  E.g. we treat this as
3334      valid code:
3335
3336        int a[256];
3337        ...
3338        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3339
3340      where two lvalues with the same int[4][3] type overlap, and where
3341      both lvalues are distinct from the object's declared type.  */
3342   if (same_base_p)
3343     {
3344       if (indices_a->unconstrained_base)
3345         full_seq.length += 1;
3346     }
3347   else
3348     full_seq = struct_seq;
3349
3350   /* Punt if we didn't find a suitable sequence.  */
3351   if (full_seq.length == 0)
3352     {
3353       if (use_alt_indices
3354           || (TREE_CODE (DR_REF (a)) == MEM_REF
3355               && TREE_CODE (DR_REF (b)) == MEM_REF)
3356           || may_be_nonaddressable_p (DR_REF (a))
3357           || may_be_nonaddressable_p (DR_REF (b)))
3358         {
3359           /* Fully exhausted possibilities.  */
3360           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3361           return res;
3362         }
3363
3364       /* Try evaluating both DRs as dereferences of pointers.  */
3365       if (!a->alt_indices.base_object
3366           && TREE_CODE (DR_REF (a)) != MEM_REF)
3367         {
3368           tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
3369                                  build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
3370                                  build_int_cst
3371                                    (reference_alias_ptr_type (DR_REF (a)), 0));
3372           dr_analyze_indices (&a->alt_indices, alt_ref,
3373                               loop_preheader_edge (loop_nest[0]),
3374                               loop_containing_stmt (DR_STMT (a)));
3375         }
3376       if (!b->alt_indices.base_object
3377           && TREE_CODE (DR_REF (b)) != MEM_REF)
3378         {
3379           tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
3380                                  build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
3381                                  build_int_cst
3382                                    (reference_alias_ptr_type (DR_REF (b)), 0));
3383           dr_analyze_indices (&b->alt_indices, alt_ref,
3384                               loop_preheader_edge (loop_nest[0]),
3385                               loop_containing_stmt (DR_STMT (b)));
3386         }
3387       return initialize_data_dependence_relation (res, loop_nest, true);
3388     }
3389
3390   if (!same_base_p)
3391     {
3392       /* Partial overlap is possible for different bases when strict aliasing
3393          is not in effect.  It's also possible if either base involves a union
3394          access; e.g. for:
3395
3396            struct s1 { int a[2]; };
3397            struct s2 { struct s1 b; int c; };
3398            struct s3 { int d; struct s1 e; };
3399            union u { struct s2 f; struct s3 g; } *p, *q;
3400
3401          the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3402          "p->g.e" (base "p->g") and might partially overlap the s1 at
3403          "q->g.e" (base "q->g").  */
3404       if (!flag_strict_aliasing
3405           || ref_contains_union_access_p (full_seq.object_a)
3406           || ref_contains_union_access_p (full_seq.object_b))
3407         {
3408           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3409           return res;
3410         }
3411
3412       DDR_COULD_BE_INDEPENDENT_P (res) = true;
3413       if (!loop_nest.exists ()
3414           || (object_address_invariant_in_loop_p (loop_nest[0],
3415                                                   full_seq.object_a)
3416               && object_address_invariant_in_loop_p (loop_nest[0],
3417                                                      full_seq.object_b)))
3418         {
3419           DDR_OBJECT_A (res) = full_seq.object_a;
3420           DDR_OBJECT_B (res) = full_seq.object_b;
3421         }
3422     }
3423
3424   DDR_AFFINE_P (res) = true;
3425   DDR_ARE_DEPENDENT (res) = NULL_TREE;
3426   DDR_SUBSCRIPTS (res).create (full_seq.length);
3427   DDR_LOOP_NEST (res) = loop_nest;
3428   DDR_SELF_REFERENCE (res) = false;
3429
3430   for (i = 0; i < full_seq.length; ++i)
3431     {
3432       struct subscript *subscript;
3433
3434       subscript = XNEW (struct subscript);
3435       SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
3436       SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
3437       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3438       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3439       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3440       SUB_DISTANCE (subscript) = chrec_dont_know;
3441       DDR_SUBSCRIPTS (res).safe_push (subscript);
3442     }
3443
3444   return res;
3445 }
3446
3447 /* Initialize a data dependence relation between data accesses A and
3448    B.  NB_LOOPS is the number of loops surrounding the references: the
3449    size of the classic distance/direction vectors.  */
3450
3451 struct data_dependence_relation *
3452 initialize_data_dependence_relation (struct data_reference *a,
3453                                      struct data_reference *b,
3454                                      vec<loop_p> loop_nest)
3455 {
3456   data_dependence_relation *res = XCNEW (struct data_dependence_relation);
3457   DDR_A (res) = a;
3458   DDR_B (res) = b;
3459   DDR_LOOP_NEST (res).create (0);
3460   DDR_SUBSCRIPTS (res).create (0);
3461   DDR_DIR_VECTS (res).create (0);
3462   DDR_DIST_VECTS (res).create (0);
3463
3464   if (a == NULL || b == NULL)
3465     {
3466       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3467       return res;
3468     }
3469
3470   /* If the data references do not alias, then they are independent.  */
3471   if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
3472     {
3473       DDR_ARE_DEPENDENT (res) = chrec_known;
3474       return res;
3475     }
3476
3477   return initialize_data_dependence_relation (res, loop_nest, false);
3478 }
3479
3480
3481 /* Frees memory used by the conflict function F.  */
3482
3483 static void
3484 free_conflict_function (conflict_function *f)
3485 {
3486   unsigned i;
3487
3488   if (CF_NONTRIVIAL_P (f))
3489     {
3490       for (i = 0; i < f->n; i++)
3491         affine_fn_free (f->fns[i]);
3492     }
3493   free (f);
3494 }
3495
3496 /* Frees memory used by SUBSCRIPTS.  */
3497
3498 static void
3499 free_subscripts (vec<subscript_p> subscripts)
3500 {
3501   for (subscript_p s : subscripts)
3502     {
3503       free_conflict_function (s->conflicting_iterations_in_a);
3504       free_conflict_function (s->conflicting_iterations_in_b);
3505       free (s);
3506     }
3507   subscripts.release ();
3508 }
3509
3510 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3511    description.  */
3512
3513 static inline void
3514 finalize_ddr_dependent (struct data_dependence_relation *ddr,
3515                         tree chrec)
3516 {
3517   DDR_ARE_DEPENDENT (ddr) = chrec;
3518   free_subscripts (DDR_SUBSCRIPTS (ddr));
3519   DDR_SUBSCRIPTS (ddr).create (0);
3520 }
3521
3522 /* The dependence relation DDR cannot be represented by a distance
3523    vector.  */
3524
3525 static inline void
3526 non_affine_dependence_relation (struct data_dependence_relation *ddr)
3527 {
3528   if (dump_file && (dump_flags & TDF_DETAILS))
3529     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3530
3531   DDR_AFFINE_P (ddr) = false;
3532 }
3533
3534 \f
3535
3536 /* This section contains the classic Banerjee tests.  */
3537
3538 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3539    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
3540
3541 static inline bool
3542 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3543 {
3544   return (evolution_function_is_constant_p (chrec_a)
3545           && evolution_function_is_constant_p (chrec_b));
3546 }
3547
3548 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3549    variable, i.e., if the SIV (Single Index Variable) test is true.  */
3550
3551 static bool
3552 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3553 {
3554   if ((evolution_function_is_constant_p (chrec_a)
3555        && evolution_function_is_univariate_p (chrec_b))
3556       || (evolution_function_is_constant_p (chrec_b)
3557           && evolution_function_is_univariate_p (chrec_a)))
3558     return true;
3559
3560   if (evolution_function_is_univariate_p (chrec_a)
3561       && evolution_function_is_univariate_p (chrec_b))
3562     {
3563       switch (TREE_CODE (chrec_a))
3564         {
3565         case POLYNOMIAL_CHREC:
3566           switch (TREE_CODE (chrec_b))
3567             {
3568             case POLYNOMIAL_CHREC:
3569               if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3570                 return false;
3571               /* FALLTHRU */
3572
3573             default:
3574               return true;
3575             }
3576
3577         default:
3578           return true;
3579         }
3580     }
3581
3582   return false;
3583 }
3584
3585 /* Creates a conflict function with N dimensions.  The affine functions
3586    in each dimension follow.  */
3587
3588 static conflict_function *
3589 conflict_fn (unsigned n, ...)
3590 {
3591   unsigned i;
3592   conflict_function *ret = XCNEW (conflict_function);
3593   va_list ap;
3594
3595   gcc_assert (n > 0 && n <= MAX_DIM);
3596   va_start (ap, n);
3597
3598   ret->n = n;
3599   for (i = 0; i < n; i++)
3600     ret->fns[i] = va_arg (ap, affine_fn);
3601   va_end (ap);
3602
3603   return ret;
3604 }
3605
3606 /* Returns constant affine function with value CST.  */
3607
3608 static affine_fn
3609 affine_fn_cst (tree cst)
3610 {
3611   affine_fn fn;
3612   fn.create (1);
3613   fn.quick_push (cst);
3614   return fn;
3615 }
3616
3617 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
3618
3619 static affine_fn
3620 affine_fn_univar (tree cst, unsigned dim, tree coef)
3621 {
3622   affine_fn fn;
3623   fn.create (dim + 1);
3624   unsigned i;
3625
3626   gcc_assert (dim > 0);
3627   fn.quick_push (cst);
3628   for (i = 1; i < dim; i++)
3629     fn.quick_push (integer_zero_node);
3630   fn.quick_push (coef);
3631   return fn;
3632 }
3633
3634 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
3635    *OVERLAPS_B are initialized to the functions that describe the
3636    relation between the elements accessed twice by CHREC_A and
3637    CHREC_B.  For k >= 0, the following property is verified:
3638
3639    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3640
3641 static void
3642 analyze_ziv_subscript (tree chrec_a,
3643                        tree chrec_b,
3644                        conflict_function **overlaps_a,
3645                        conflict_function **overlaps_b,
3646                        tree *last_conflicts)
3647 {
3648   tree type, difference;
3649   dependence_stats.num_ziv++;
3650
3651   if (dump_file && (dump_flags & TDF_DETAILS))
3652     fprintf (dump_file, "(analyze_ziv_subscript \n");
3653
3654   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3655   chrec_a = chrec_convert (type, chrec_a, NULL);
3656   chrec_b = chrec_convert (type, chrec_b, NULL);
3657   difference = chrec_fold_minus (type, chrec_a, chrec_b);
3658
3659   switch (TREE_CODE (difference))
3660     {
3661     case INTEGER_CST:
3662       if (integer_zerop (difference))
3663         {
3664           /* The difference is equal to zero: the accessed index
3665              overlaps for each iteration in the loop.  */
3666           *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3667           *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3668           *last_conflicts = chrec_dont_know;
3669           dependence_stats.num_ziv_dependent++;
3670         }
3671       else
3672         {
3673           /* The accesses do not overlap.  */
3674           *overlaps_a = conflict_fn_no_dependence ();
3675           *overlaps_b = conflict_fn_no_dependence ();
3676           *last_conflicts = integer_zero_node;
3677           dependence_stats.num_ziv_independent++;
3678         }
3679       break;
3680
3681     default:
3682       /* We're not sure whether the indexes overlap.  For the moment,
3683          conservatively answer "don't know".  */
3684       if (dump_file && (dump_flags & TDF_DETAILS))
3685         fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3686
3687       *overlaps_a = conflict_fn_not_known ();
3688       *overlaps_b = conflict_fn_not_known ();
3689       *last_conflicts = chrec_dont_know;
3690       dependence_stats.num_ziv_unimplemented++;
3691       break;
3692     }
3693
3694   if (dump_file && (dump_flags & TDF_DETAILS))
3695     fprintf (dump_file, ")\n");
3696 }
3697
3698 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3699    and only if it fits to the int type.  If this is not the case, or the
3700    bound  on the number of iterations of LOOP could not be derived, returns
3701    chrec_dont_know.  */
3702
3703 static tree
3704 max_stmt_executions_tree (class loop *loop)
3705 {
3706   widest_int nit;
3707
3708   if (!max_stmt_executions (loop, &nit))
3709     return chrec_dont_know;
3710
3711   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3712     return chrec_dont_know;
3713
3714   return wide_int_to_tree (unsigned_type_node, nit);
3715 }
3716
3717 /* Determine whether the CHREC is always positive/negative.  If the expression
3718    cannot be statically analyzed, return false, otherwise set the answer into
3719    VALUE.  */
3720
3721 static bool
3722 chrec_is_positive (tree chrec, bool *value)
3723 {
3724   bool value0, value1, value2;
3725   tree end_value, nb_iter;
3726
3727   switch (TREE_CODE (chrec))
3728     {
3729     case POLYNOMIAL_CHREC:
3730       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3731           || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3732         return false;
3733
3734       /* FIXME -- overflows.  */
3735       if (value0 == value1)
3736         {
3737           *value = value0;
3738           return true;
3739         }
3740
3741       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3742          and the proof consists in showing that the sign never
3743          changes during the execution of the loop, from 0 to
3744          loop->nb_iterations.  */
3745       if (!evolution_function_is_affine_p (chrec))
3746         return false;
3747
3748       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3749       if (chrec_contains_undetermined (nb_iter))
3750         return false;
3751
3752 #if 0
3753       /* TODO -- If the test is after the exit, we may decrease the number of
3754          iterations by one.  */
3755       if (after_exit)
3756         nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3757 #endif
3758
3759       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3760
3761       if (!chrec_is_positive (end_value, &value2))
3762         return false;
3763
3764       *value = value0;
3765       return value0 == value1;
3766
3767     case INTEGER_CST:
3768       switch (tree_int_cst_sgn (chrec))
3769         {
3770         case -1:
3771           *value = false;
3772           break;
3773         case 1:
3774           *value = true;
3775           break;
3776         default:
3777           return false;
3778         }
3779       return true;
3780
3781     default:
3782       return false;
3783     }
3784 }
3785
3786
3787 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3788    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
3789    *OVERLAPS_B are initialized to the functions that describe the
3790    relation between the elements accessed twice by CHREC_A and
3791    CHREC_B.  For k >= 0, the following property is verified:
3792
3793    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3794
3795 static void
3796 analyze_siv_subscript_cst_affine (tree chrec_a,
3797                                   tree chrec_b,
3798                                   conflict_function **overlaps_a,
3799                                   conflict_function **overlaps_b,
3800                                   tree *last_conflicts)
3801 {
3802   bool value0, value1, value2;
3803   tree type, difference, tmp;
3804
3805   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3806   chrec_a = chrec_convert (type, chrec_a, NULL);
3807   chrec_b = chrec_convert (type, chrec_b, NULL);
3808   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3809
3810   /* Special case overlap in the first iteration.  */
3811   if (integer_zerop (difference))
3812     {
3813       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3814       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3815       *last_conflicts = integer_one_node;
3816       return;
3817     }
3818
3819   if (!chrec_is_positive (initial_condition (difference), &value0))
3820     {
3821       if (dump_file && (dump_flags & TDF_DETAILS))
3822         fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3823
3824       dependence_stats.num_siv_unimplemented++;
3825       *overlaps_a = conflict_fn_not_known ();
3826       *overlaps_b = conflict_fn_not_known ();
3827       *last_conflicts = chrec_dont_know;
3828       return;
3829     }
3830   else
3831     {
3832       if (value0 == false)
3833         {
3834           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3835               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3836             {
3837               if (dump_file && (dump_flags & TDF_DETAILS))
3838                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3839
3840               *overlaps_a = conflict_fn_not_known ();
3841               *overlaps_b = conflict_fn_not_known ();
3842               *last_conflicts = chrec_dont_know;
3843               dependence_stats.num_siv_unimplemented++;
3844               return;
3845             }
3846           else
3847             {
3848               if (value1 == true)
3849                 {
3850                   /* Example:
3851                      chrec_a = 12
3852                      chrec_b = {10, +, 1}
3853                   */
3854
3855                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3856                     {
3857                       HOST_WIDE_INT numiter;
3858                       class loop *loop = get_chrec_loop (chrec_b);
3859
3860                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3861                       tmp = fold_build2 (EXACT_DIV_EXPR, type,
3862                                          fold_build1 (ABS_EXPR, type, difference),
3863                                          CHREC_RIGHT (chrec_b));
3864                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3865                       *last_conflicts = integer_one_node;
3866
3867
3868                       /* Perform weak-zero siv test to see if overlap is
3869                          outside the loop bounds.  */
3870                       numiter = max_stmt_executions_int (loop);
3871
3872                       if (numiter >= 0
3873                           && compare_tree_int (tmp, numiter) > 0)
3874                         {
3875                           free_conflict_function (*overlaps_a);
3876                           free_conflict_function (*overlaps_b);
3877                           *overlaps_a = conflict_fn_no_dependence ();
3878                           *overlaps_b = conflict_fn_no_dependence ();
3879                           *last_conflicts = integer_zero_node;
3880                           dependence_stats.num_siv_independent++;
3881                           return;
3882                         }
3883                       dependence_stats.num_siv_dependent++;
3884                       return;
3885                     }
3886
3887                   /* When the step does not divide the difference, there are
3888                      no overlaps.  */
3889                   else
3890                     {
3891                       *overlaps_a = conflict_fn_no_dependence ();
3892                       *overlaps_b = conflict_fn_no_dependence ();
3893                       *last_conflicts = integer_zero_node;
3894                       dependence_stats.num_siv_independent++;
3895                       return;
3896                     }
3897                 }
3898
3899               else
3900                 {
3901                   /* Example:
3902                      chrec_a = 12
3903                      chrec_b = {10, +, -1}
3904
3905                      In this case, chrec_a will not overlap with chrec_b.  */
3906                   *overlaps_a = conflict_fn_no_dependence ();
3907                   *overlaps_b = conflict_fn_no_dependence ();
3908                   *last_conflicts = integer_zero_node;
3909                   dependence_stats.num_siv_independent++;
3910                   return;
3911                 }
3912             }
3913         }
3914       else
3915         {
3916           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3917               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3918             {
3919               if (dump_file && (dump_flags & TDF_DETAILS))
3920                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3921
3922               *overlaps_a = conflict_fn_not_known ();
3923               *overlaps_b = conflict_fn_not_known ();
3924               *last_conflicts = chrec_dont_know;
3925               dependence_stats.num_siv_unimplemented++;
3926               return;
3927             }
3928           else
3929             {
3930               if (value2 == false)
3931                 {
3932                   /* Example:
3933                      chrec_a = 3
3934                      chrec_b = {10, +, -1}
3935                   */
3936                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3937                     {
3938                       HOST_WIDE_INT numiter;
3939                       class loop *loop = get_chrec_loop (chrec_b);
3940
3941                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3942                       tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3943                                          CHREC_RIGHT (chrec_b));
3944                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3945                       *last_conflicts = integer_one_node;
3946
3947                       /* Perform weak-zero siv test to see if overlap is
3948                          outside the loop bounds.  */
3949                       numiter = max_stmt_executions_int (loop);
3950
3951                       if (numiter >= 0
3952                           && compare_tree_int (tmp, numiter) > 0)
3953                         {
3954                           free_conflict_function (*overlaps_a);
3955                           free_conflict_function (*overlaps_b);
3956                           *overlaps_a = conflict_fn_no_dependence ();
3957                           *overlaps_b = conflict_fn_no_dependence ();
3958                           *last_conflicts = integer_zero_node;
3959                           dependence_stats.num_siv_independent++;
3960                           return;
3961                         }
3962                       dependence_stats.num_siv_dependent++;
3963                       return;
3964                     }
3965
3966                   /* When the step does not divide the difference, there
3967                      are no overlaps.  */
3968                   else
3969                     {
3970                       *overlaps_a = conflict_fn_no_dependence ();
3971                       *overlaps_b = conflict_fn_no_dependence ();
3972                       *last_conflicts = integer_zero_node;
3973                       dependence_stats.num_siv_independent++;
3974                       return;
3975                     }
3976                 }
3977               else
3978                 {
3979                   /* Example:
3980                      chrec_a = 3
3981                      chrec_b = {4, +, 1}
3982
3983                      In this case, chrec_a will not overlap with chrec_b.  */
3984                   *overlaps_a = conflict_fn_no_dependence ();
3985                   *overlaps_b = conflict_fn_no_dependence ();
3986                   *last_conflicts = integer_zero_node;
3987                   dependence_stats.num_siv_independent++;
3988                   return;
3989                 }
3990             }
3991         }
3992     }
3993 }
3994
3995 /* Helper recursive function for initializing the matrix A.  Returns
3996    the initial value of CHREC.  */
3997
3998 static tree
3999 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
4000 {
4001   gcc_assert (chrec);
4002
4003   switch (TREE_CODE (chrec))
4004     {
4005     case POLYNOMIAL_CHREC:
4006       HOST_WIDE_INT chrec_right;
4007       if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
4008         return chrec_dont_know;
4009       chrec_right = int_cst_value (CHREC_RIGHT (chrec));
4010       /* We want to be able to negate without overflow.  */
4011       if (chrec_right == HOST_WIDE_INT_MIN)
4012         return chrec_dont_know;
4013       A[index][0] = mult * chrec_right;
4014       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
4015
4016     case PLUS_EXPR:
4017     case MULT_EXPR:
4018     case MINUS_EXPR:
4019       {
4020         tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4021         tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
4022
4023         return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
4024       }
4025
4026     CASE_CONVERT:
4027       {
4028         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4029         return chrec_convert (chrec_type (chrec), op, NULL);
4030       }
4031
4032     case BIT_NOT_EXPR:
4033       {
4034         /* Handle ~X as -1 - X.  */
4035         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4036         return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
4037                               build_int_cst (TREE_TYPE (chrec), -1), op);
4038       }
4039
4040     case INTEGER_CST:
4041       return chrec;
4042
4043     default:
4044       gcc_unreachable ();
4045       return NULL_TREE;
4046     }
4047 }
4048
4049 #define FLOOR_DIV(x,y) ((x) / (y))
4050
4051 /* Solves the special case of the Diophantine equation:
4052    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4053
4054    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
4055    number of iterations that loops X and Y run.  The overlaps will be
4056    constructed as evolutions in dimension DIM.  */
4057
4058 static void
4059 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
4060                                          HOST_WIDE_INT step_a,
4061                                          HOST_WIDE_INT step_b,
4062                                          affine_fn *overlaps_a,
4063                                          affine_fn *overlaps_b,
4064                                          tree *last_conflicts, int dim)
4065 {
4066   if (((step_a > 0 && step_b > 0)
4067        || (step_a < 0 && step_b < 0)))
4068     {
4069       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
4070       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
4071
4072       gcd_steps_a_b = gcd (step_a, step_b);
4073       step_overlaps_a = step_b / gcd_steps_a_b;
4074       step_overlaps_b = step_a / gcd_steps_a_b;
4075
4076       if (niter > 0)
4077         {
4078           tau2 = FLOOR_DIV (niter, step_overlaps_a);
4079           tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
4080           last_conflict = tau2;
4081           *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
4082         }
4083       else
4084         *last_conflicts = chrec_dont_know;
4085
4086       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
4087                                       build_int_cst (NULL_TREE,
4088                                                      step_overlaps_a));
4089       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
4090                                       build_int_cst (NULL_TREE,
4091                                                      step_overlaps_b));
4092     }
4093
4094   else
4095     {
4096       *overlaps_a = affine_fn_cst (integer_zero_node);
4097       *overlaps_b = affine_fn_cst (integer_zero_node);
4098       *last_conflicts = integer_zero_node;
4099     }
4100 }
4101
4102 /* Solves the special case of a Diophantine equation where CHREC_A is
4103    an affine bivariate function, and CHREC_B is an affine univariate
4104    function.  For example,
4105
4106    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4107
4108    has the following overlapping functions:
4109
4110    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4111    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4112    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4113
4114    FORNOW: This is a specialized implementation for a case occurring in
4115    a common benchmark.  Implement the general algorithm.  */
4116
4117 static void
4118 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
4119                                       conflict_function **overlaps_a,
4120                                       conflict_function **overlaps_b,
4121                                       tree *last_conflicts)
4122 {
4123   bool xz_p, yz_p, xyz_p;
4124   HOST_WIDE_INT step_x, step_y, step_z;
4125   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
4126   affine_fn overlaps_a_xz, overlaps_b_xz;
4127   affine_fn overlaps_a_yz, overlaps_b_yz;
4128   affine_fn overlaps_a_xyz, overlaps_b_xyz;
4129   affine_fn ova1, ova2, ovb;
4130   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
4131
4132   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
4133   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
4134   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
4135
4136   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
4137   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
4138   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
4139
4140   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
4141     {
4142       if (dump_file && (dump_flags & TDF_DETAILS))
4143         fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
4144
4145       *overlaps_a = conflict_fn_not_known ();
4146       *overlaps_b = conflict_fn_not_known ();
4147       *last_conflicts = chrec_dont_know;
4148       return;
4149     }
4150
4151   niter = MIN (niter_x, niter_z);
4152   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
4153                                            &overlaps_a_xz,
4154                                            &overlaps_b_xz,
4155                                            &last_conflicts_xz, 1);
4156   niter = MIN (niter_y, niter_z);
4157   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
4158                                            &overlaps_a_yz,
4159                                            &overlaps_b_yz,
4160                                            &last_conflicts_yz, 2);
4161   niter = MIN (niter_x, niter_z);
4162   niter = MIN (niter_y, niter);
4163   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
4164                                            &overlaps_a_xyz,
4165                                            &overlaps_b_xyz,
4166                                            &last_conflicts_xyz, 3);
4167
4168   xz_p = !integer_zerop (last_conflicts_xz);
4169   yz_p = !integer_zerop (last_conflicts_yz);
4170   xyz_p = !integer_zerop (last_conflicts_xyz);
4171
4172   if (xz_p || yz_p || xyz_p)
4173     {
4174       ova1 = affine_fn_cst (integer_zero_node);
4175       ova2 = affine_fn_cst (integer_zero_node);
4176       ovb = affine_fn_cst (integer_zero_node);
4177       if (xz_p)
4178         {
4179           affine_fn t0 = ova1;
4180           affine_fn t2 = ovb;
4181
4182           ova1 = affine_fn_plus (ova1, overlaps_a_xz);
4183           ovb = affine_fn_plus (ovb, overlaps_b_xz);
4184           affine_fn_free (t0);
4185           affine_fn_free (t2);
4186           *last_conflicts = last_conflicts_xz;
4187         }
4188       if (yz_p)
4189         {
4190           affine_fn t0 = ova2;
4191           affine_fn t2 = ovb;
4192
4193           ova2 = affine_fn_plus (ova2, overlaps_a_yz);
4194           ovb = affine_fn_plus (ovb, overlaps_b_yz);
4195           affine_fn_free (t0);
4196           affine_fn_free (t2);
4197           *last_conflicts = last_conflicts_yz;
4198         }
4199       if (xyz_p)
4200         {
4201           affine_fn t0 = ova1;
4202           affine_fn t2 = ova2;
4203           affine_fn t4 = ovb;
4204
4205           ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
4206           ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
4207           ovb = affine_fn_plus (ovb, overlaps_b_xyz);
4208           affine_fn_free (t0);
4209           affine_fn_free (t2);
4210           affine_fn_free (t4);
4211           *last_conflicts = last_conflicts_xyz;
4212         }
4213       *overlaps_a = conflict_fn (2, ova1, ova2);
4214       *overlaps_b = conflict_fn (1, ovb);
4215     }
4216   else
4217     {
4218       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4219       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4220       *last_conflicts = integer_zero_node;
4221     }
4222
4223   affine_fn_free (overlaps_a_xz);
4224   affine_fn_free (overlaps_b_xz);
4225   affine_fn_free (overlaps_a_yz);
4226   affine_fn_free (overlaps_b_yz);
4227   affine_fn_free (overlaps_a_xyz);
4228   affine_fn_free (overlaps_b_xyz);
4229 }
4230
4231 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
4232
4233 static void
4234 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
4235                     int size)
4236 {
4237   memcpy (vec2, vec1, size * sizeof (*vec1));
4238 }
4239
4240 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
4241
4242 static void
4243 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
4244                     int m, int n)
4245 {
4246   int i;
4247
4248   for (i = 0; i < m; i++)
4249     lambda_vector_copy (mat1[i], mat2[i], n);
4250 }
4251
4252 /* Store the N x N identity matrix in MAT.  */
4253
4254 static void
4255 lambda_matrix_id (lambda_matrix mat, int size)
4256 {
4257   int i, j;
4258
4259   for (i = 0; i < size; i++)
4260     for (j = 0; j < size; j++)
4261       mat[i][j] = (i == j) ? 1 : 0;
4262 }
4263
4264 /* Return the index of the first nonzero element of vector VEC1 between
4265    START and N.  We must have START <= N.
4266    Returns N if VEC1 is the zero vector.  */
4267
4268 static int
4269 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
4270 {
4271   int j = start;
4272   while (j < n && vec1[j] == 0)
4273     j++;
4274   return j;
4275 }
4276
4277 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4278    R2 = R2 + CONST1 * R1.  */
4279
4280 static bool
4281 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
4282                        lambda_int const1)
4283 {
4284   int i;
4285
4286   if (const1 == 0)
4287     return true;
4288
4289   for (i = 0; i < n; i++)
4290     {
4291       bool ovf;
4292       lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
4293       if (ovf)
4294         return false;
4295       lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
4296       if (ovf || tem2 == HOST_WIDE_INT_MIN)
4297         return false;
4298       mat[r2][i] = tem2;
4299     }
4300
4301   return true;
4302 }
4303
4304 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
4305    and store the result in VEC2.  */
4306
4307 static void
4308 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
4309                           int size, lambda_int const1)
4310 {
4311   int i;
4312
4313   if (const1 == 0)
4314     lambda_vector_clear (vec2, size);
4315   else
4316     for (i = 0; i < size; i++)
4317       vec2[i] = const1 * vec1[i];
4318 }
4319
4320 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
4321
4322 static void
4323 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
4324                       int size)
4325 {
4326   lambda_vector_mult_const (vec1, vec2, size, -1);
4327 }
4328
4329 /* Negate row R1 of matrix MAT which has N columns.  */
4330
4331 static void
4332 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
4333 {
4334   lambda_vector_negate (mat[r1], mat[r1], n);
4335 }
4336
4337 /* Return true if two vectors are equal.  */
4338
4339 static bool
4340 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
4341 {
4342   int i;
4343   for (i = 0; i < size; i++)
4344     if (vec1[i] != vec2[i])
4345       return false;
4346   return true;
4347 }
4348
4349 /* Given an M x N integer matrix A, this function determines an M x
4350    M unimodular matrix U, and an M x N echelon matrix S such that
4351    "U.A = S".  This decomposition is also known as "right Hermite".
4352
4353    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4354    Restructuring Compilers" Utpal Banerjee.  */
4355
4356 static bool
4357 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4358                              lambda_matrix S, lambda_matrix U)
4359 {
4360   int i, j, i0 = 0;
4361
4362   lambda_matrix_copy (A, S, m, n);
4363   lambda_matrix_id (U, m);
4364
4365   for (j = 0; j < n; j++)
4366     {
4367       if (lambda_vector_first_nz (S[j], m, i0) < m)
4368         {
4369           ++i0;
4370           for (i = m - 1; i >= i0; i--)
4371             {
4372               while (S[i][j] != 0)
4373                 {
4374                   lambda_int factor, a, b;
4375
4376                   a = S[i-1][j];
4377                   b = S[i][j];
4378                   gcc_assert (a != HOST_WIDE_INT_MIN);
4379                   factor = a / b;
4380
4381                   if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
4382                     return false;
4383                   std::swap (S[i], S[i-1]);
4384
4385                   if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
4386                     return false;
4387                   std::swap (U[i], U[i-1]);
4388                 }
4389             }
4390         }
4391     }
4392
4393   return true;
4394 }
4395
4396 /* Determines the overlapping elements due to accesses CHREC_A and
4397    CHREC_B, that are affine functions.  This function cannot handle
4398    symbolic evolution functions, ie. when initial conditions are
4399    parameters, because it uses lambda matrices of integers.  */
4400
4401 static void
4402 analyze_subscript_affine_affine (tree chrec_a,
4403                                  tree chrec_b,
4404                                  conflict_function **overlaps_a,
4405                                  conflict_function **overlaps_b,
4406                                  tree *last_conflicts)
4407 {
4408   unsigned nb_vars_a, nb_vars_b, dim;
4409   lambda_int gamma, gcd_alpha_beta;
4410   lambda_matrix A, U, S;
4411   struct obstack scratch_obstack;
4412
4413   if (eq_evolutions_p (chrec_a, chrec_b))
4414     {
4415       /* The accessed index overlaps for each iteration in the
4416          loop.  */
4417       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4418       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4419       *last_conflicts = chrec_dont_know;
4420       return;
4421     }
4422   if (dump_file && (dump_flags & TDF_DETAILS))
4423     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4424
4425   /* For determining the initial intersection, we have to solve a
4426      Diophantine equation.  This is the most time consuming part.
4427
4428      For answering to the question: "Is there a dependence?" we have
4429      to prove that there exists a solution to the Diophantine
4430      equation, and that the solution is in the iteration domain,
4431      i.e. the solution is positive or zero, and that the solution
4432      happens before the upper bound loop.nb_iterations.  Otherwise
4433      there is no dependence.  This function outputs a description of
4434      the iterations that hold the intersections.  */
4435
4436   nb_vars_a = nb_vars_in_chrec (chrec_a);
4437   nb_vars_b = nb_vars_in_chrec (chrec_b);
4438
4439   gcc_obstack_init (&scratch_obstack);
4440
4441   dim = nb_vars_a + nb_vars_b;
4442   U = lambda_matrix_new (dim, dim, &scratch_obstack);
4443   A = lambda_matrix_new (dim, 1, &scratch_obstack);
4444   S = lambda_matrix_new (dim, 1, &scratch_obstack);
4445
4446   tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4447   tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4448   if (init_a == chrec_dont_know
4449       || init_b == chrec_dont_know)
4450     {
4451       if (dump_file && (dump_flags & TDF_DETAILS))
4452         fprintf (dump_file, "affine-affine test failed: "
4453                  "representation issue.\n");
4454       *overlaps_a = conflict_fn_not_known ();
4455       *overlaps_b = conflict_fn_not_known ();
4456       *last_conflicts = chrec_dont_know;
4457       goto end_analyze_subs_aa;
4458     }
4459   gamma = int_cst_value (init_b) - int_cst_value (init_a);
4460
4461   /* Don't do all the hard work of solving the Diophantine equation
4462      when we already know the solution: for example,
4463      | {3, +, 1}_1
4464      | {3, +, 4}_2
4465      | gamma = 3 - 3 = 0.
4466      Then the first overlap occurs during the first iterations:
4467      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4468   */
4469   if (gamma == 0)
4470     {
4471       if (nb_vars_a == 1 && nb_vars_b == 1)
4472         {
4473           HOST_WIDE_INT step_a, step_b;
4474           HOST_WIDE_INT niter, niter_a, niter_b;
4475           affine_fn ova, ovb;
4476
4477           niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4478           niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4479           niter = MIN (niter_a, niter_b);
4480           step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4481           step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4482
4483           compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4484                                                    &ova, &ovb,
4485                                                    last_conflicts, 1);
4486           *overlaps_a = conflict_fn (1, ova);
4487           *overlaps_b = conflict_fn (1, ovb);
4488         }
4489
4490       else if (nb_vars_a == 2 && nb_vars_b == 1)
4491         compute_overlap_steps_for_affine_1_2
4492           (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4493
4494       else if (nb_vars_a == 1 && nb_vars_b == 2)
4495         compute_overlap_steps_for_affine_1_2
4496           (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4497
4498       else
4499         {
4500           if (dump_file && (dump_flags & TDF_DETAILS))
4501             fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4502           *overlaps_a = conflict_fn_not_known ();
4503           *overlaps_b = conflict_fn_not_known ();
4504           *last_conflicts = chrec_dont_know;
4505         }
4506       goto end_analyze_subs_aa;
4507     }
4508
4509   /* U.A = S */
4510   if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
4511     {
4512       *overlaps_a = conflict_fn_not_known ();
4513       *overlaps_b = conflict_fn_not_known ();
4514       *last_conflicts = chrec_dont_know;
4515       goto end_analyze_subs_aa;
4516     }
4517
4518   if (S[0][0] < 0)
4519     {
4520       S[0][0] *= -1;
4521       lambda_matrix_row_negate (U, dim, 0);
4522     }
4523   gcd_alpha_beta = S[0][0];
4524
4525   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4526      but that is a quite strange case.  Instead of ICEing, answer
4527      don't know.  */
4528   if (gcd_alpha_beta == 0)
4529     {
4530       *overlaps_a = conflict_fn_not_known ();
4531       *overlaps_b = conflict_fn_not_known ();
4532       *last_conflicts = chrec_dont_know;
4533       goto end_analyze_subs_aa;
4534     }
4535
4536   /* The classic "gcd-test".  */
4537   if (!int_divides_p (gcd_alpha_beta, gamma))
4538     {
4539       /* The "gcd-test" has determined that there is no integer
4540          solution, i.e. there is no dependence.  */
4541       *overlaps_a = conflict_fn_no_dependence ();
4542       *overlaps_b = conflict_fn_no_dependence ();
4543       *last_conflicts = integer_zero_node;
4544     }
4545
4546   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
4547   else if (nb_vars_a == 1 && nb_vars_b == 1)
4548     {
4549       /* Both functions should have the same evolution sign.  */
4550       if (((A[0][0] > 0 && -A[1][0] > 0)
4551            || (A[0][0] < 0 && -A[1][0] < 0)))
4552         {
4553           /* The solutions are given by:
4554              |
4555              | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
4556              |                           [u21 u22]    [y0]
4557
4558              For a given integer t.  Using the following variables,
4559
4560              | i0 = u11 * gamma / gcd_alpha_beta
4561              | j0 = u12 * gamma / gcd_alpha_beta
4562              | i1 = u21
4563              | j1 = u22
4564
4565              the solutions are:
4566
4567              | x0 = i0 + i1 * t,
4568              | y0 = j0 + j1 * t.  */
4569           HOST_WIDE_INT i0, j0, i1, j1;
4570
4571           i0 = U[0][0] * gamma / gcd_alpha_beta;
4572           j0 = U[0][1] * gamma / gcd_alpha_beta;
4573           i1 = U[1][0];
4574           j1 = U[1][1];
4575
4576           if ((i1 == 0 && i0 < 0)
4577               || (j1 == 0 && j0 < 0))
4578             {
4579               /* There is no solution.
4580                  FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4581                  falls in here, but for the moment we don't look at the
4582                  upper bound of the iteration domain.  */
4583               *overlaps_a = conflict_fn_no_dependence ();
4584               *overlaps_b = conflict_fn_no_dependence ();
4585               *last_conflicts = integer_zero_node;
4586               goto end_analyze_subs_aa;
4587             }
4588
4589           if (i1 > 0 && j1 > 0)
4590             {
4591               HOST_WIDE_INT niter_a
4592                 = max_stmt_executions_int (get_chrec_loop (chrec_a));
4593               HOST_WIDE_INT niter_b
4594                 = max_stmt_executions_int (get_chrec_loop (chrec_b));
4595               HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4596
4597               /* (X0, Y0) is a solution of the Diophantine equation:
4598                  "chrec_a (X0) = chrec_b (Y0)".  */
4599               HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4600                                         CEIL (-j0, j1));
4601               HOST_WIDE_INT x0 = i1 * tau1 + i0;
4602               HOST_WIDE_INT y0 = j1 * tau1 + j0;
4603
4604               /* (X1, Y1) is the smallest positive solution of the eq
4605                  "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4606                  first conflict occurs.  */
4607               HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4608               HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4609               HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4610
4611               if (niter > 0)
4612                 {
4613                   /* If the overlap occurs outside of the bounds of the
4614                      loop, there is no dependence.  */
4615                   if (x1 >= niter_a || y1 >= niter_b)
4616                     {
4617                       *overlaps_a = conflict_fn_no_dependence ();
4618                       *overlaps_b = conflict_fn_no_dependence ();
4619                       *last_conflicts = integer_zero_node;
4620                       goto end_analyze_subs_aa;
4621                     }
4622
4623                   /* max stmt executions can get quite large, avoid
4624                      overflows by using wide ints here.  */
4625                   widest_int tau2
4626                     = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4627                                 wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4628                   widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4629                   if (wi::min_precision (last_conflict, SIGNED)
4630                       <= TYPE_PRECISION (integer_type_node))
4631                     *last_conflicts
4632                        = build_int_cst (integer_type_node,
4633                                         last_conflict.to_shwi ());
4634                   else
4635                     *last_conflicts = chrec_dont_know;
4636                 }
4637               else
4638                 *last_conflicts = chrec_dont_know;
4639
4640               *overlaps_a
4641                 = conflict_fn (1,
4642                                affine_fn_univar (build_int_cst (NULL_TREE, x1),
4643                                                  1,
4644                                                  build_int_cst (NULL_TREE, i1)));
4645               *overlaps_b
4646                 = conflict_fn (1,
4647                                affine_fn_univar (build_int_cst (NULL_TREE, y1),
4648                                                  1,
4649                                                  build_int_cst (NULL_TREE, j1)));
4650             }
4651           else
4652             {
4653               /* FIXME: For the moment, the upper bound of the
4654                  iteration domain for i and j is not checked.  */
4655               if (dump_file && (dump_flags & TDF_DETAILS))
4656                 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4657               *overlaps_a = conflict_fn_not_known ();
4658               *overlaps_b = conflict_fn_not_known ();
4659               *last_conflicts = chrec_dont_know;
4660             }
4661         }
4662       else
4663         {
4664           if (dump_file && (dump_flags & TDF_DETAILS))
4665             fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4666           *overlaps_a = conflict_fn_not_known ();
4667           *overlaps_b = conflict_fn_not_known ();
4668           *last_conflicts = chrec_dont_know;
4669         }
4670     }
4671   else
4672     {
4673       if (dump_file && (dump_flags & TDF_DETAILS))
4674         fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4675       *overlaps_a = conflict_fn_not_known ();
4676       *overlaps_b = conflict_fn_not_known ();
4677       *last_conflicts = chrec_dont_know;
4678     }
4679
4680 end_analyze_subs_aa:
4681   obstack_free (&scratch_obstack, NULL);
4682   if (dump_file && (dump_flags & TDF_DETAILS))
4683     {
4684       fprintf (dump_file, "  (overlaps_a = ");
4685       dump_conflict_function (dump_file, *overlaps_a);
4686       fprintf (dump_file, ")\n  (overlaps_b = ");
4687       dump_conflict_function (dump_file, *overlaps_b);
4688       fprintf (dump_file, "))\n");
4689     }
4690 }
4691
4692 /* Returns true when analyze_subscript_affine_affine can be used for
4693    determining the dependence relation between chrec_a and chrec_b,
4694    that contain symbols.  This function modifies chrec_a and chrec_b
4695    such that the analysis result is the same, and such that they don't
4696    contain symbols, and then can safely be passed to the analyzer.
4697
4698    Example: The analysis of the following tuples of evolutions produce
4699    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4700    vs. {0, +, 1}_1
4701
4702    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4703    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4704 */
4705
4706 static bool
4707 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4708 {
4709   tree diff, type, left_a, left_b, right_b;
4710
4711   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4712       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4713     /* FIXME: For the moment not handled.  Might be refined later.  */
4714     return false;
4715
4716   type = chrec_type (*chrec_a);
4717   left_a = CHREC_LEFT (*chrec_a);
4718   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4719   diff = chrec_fold_minus (type, left_a, left_b);
4720
4721   if (!evolution_function_is_constant_p (diff))
4722     return false;
4723
4724   if (dump_file && (dump_flags & TDF_DETAILS))
4725     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4726
4727   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4728                                      diff, CHREC_RIGHT (*chrec_a));
4729   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4730   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4731                                      build_int_cst (type, 0),
4732                                      right_b);
4733   return true;
4734 }
4735
4736 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
4737    *OVERLAPS_B are initialized to the functions that describe the
4738    relation between the elements accessed twice by CHREC_A and
4739    CHREC_B.  For k >= 0, the following property is verified:
4740
4741    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4742
4743 static void
4744 analyze_siv_subscript (tree chrec_a,
4745                        tree chrec_b,
4746                        conflict_function **overlaps_a,
4747                        conflict_function **overlaps_b,
4748                        tree *last_conflicts,
4749                        int loop_nest_num)
4750 {
4751   dependence_stats.num_siv++;
4752
4753   if (dump_file && (dump_flags & TDF_DETAILS))
4754     fprintf (dump_file, "(analyze_siv_subscript \n");
4755
4756   if (evolution_function_is_constant_p (chrec_a)
4757       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4758     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4759                                       overlaps_a, overlaps_b, last_conflicts);
4760
4761   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4762            && evolution_function_is_constant_p (chrec_b))
4763     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4764                                       overlaps_b, overlaps_a, last_conflicts);
4765
4766   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4767            && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4768     {
4769       if (!chrec_contains_symbols (chrec_a)
4770           && !chrec_contains_symbols (chrec_b))
4771         {
4772           analyze_subscript_affine_affine (chrec_a, chrec_b,
4773                                            overlaps_a, overlaps_b,
4774                                            last_conflicts);
4775
4776           if (CF_NOT_KNOWN_P (*overlaps_a)
4777               || CF_NOT_KNOWN_P (*overlaps_b))
4778             dependence_stats.num_siv_unimplemented++;
4779           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4780                    || CF_NO_DEPENDENCE_P (*overlaps_b))
4781             dependence_stats.num_siv_independent++;
4782           else
4783             dependence_stats.num_siv_dependent++;
4784         }
4785       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4786                                                         &chrec_b))
4787         {
4788           analyze_subscript_affine_affine (chrec_a, chrec_b,
4789                                            overlaps_a, overlaps_b,
4790                                            last_conflicts);
4791
4792           if (CF_NOT_KNOWN_P (*overlaps_a)
4793               || CF_NOT_KNOWN_P (*overlaps_b))
4794             dependence_stats.num_siv_unimplemented++;
4795           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4796                    || CF_NO_DEPENDENCE_P (*overlaps_b))
4797             dependence_stats.num_siv_independent++;
4798           else
4799             dependence_stats.num_siv_dependent++;
4800         }
4801       else
4802         goto siv_subscript_dontknow;
4803     }
4804
4805   else
4806     {
4807     siv_subscript_dontknow:;
4808       if (dump_file && (dump_flags & TDF_DETAILS))
4809         fprintf (dump_file, "  siv test failed: unimplemented");
4810       *overlaps_a = conflict_fn_not_known ();
4811       *overlaps_b = conflict_fn_not_known ();
4812       *last_conflicts = chrec_dont_know;
4813       dependence_stats.num_siv_unimplemented++;
4814     }
4815
4816   if (dump_file && (dump_flags & TDF_DETAILS))
4817     fprintf (dump_file, ")\n");
4818 }
4819
4820 /* Returns false if we can prove that the greatest common divisor of the steps
4821    of CHREC does not divide CST, false otherwise.  */
4822
4823 static bool
4824 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4825 {
4826   HOST_WIDE_INT cd = 0, val;
4827   tree step;
4828
4829   if (!tree_fits_shwi_p (cst))
4830     return true;
4831   val = tree_to_shwi (cst);
4832
4833   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4834     {
4835       step = CHREC_RIGHT (chrec);
4836       if (!tree_fits_shwi_p (step))
4837         return true;
4838       cd = gcd (cd, tree_to_shwi (step));
4839       chrec = CHREC_LEFT (chrec);
4840     }
4841
4842   return val % cd == 0;
4843 }
4844
4845 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4846    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
4847    functions that describe the relation between the elements accessed
4848    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
4849    is verified:
4850
4851    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4852
4853 static void
4854 analyze_miv_subscript (tree chrec_a,
4855                        tree chrec_b,
4856                        conflict_function **overlaps_a,
4857                        conflict_function **overlaps_b,
4858                        tree *last_conflicts,
4859                        class loop *loop_nest)
4860 {
4861   tree type, difference;
4862
4863   dependence_stats.num_miv++;
4864   if (dump_file && (dump_flags & TDF_DETAILS))
4865     fprintf (dump_file, "(analyze_miv_subscript \n");
4866
4867   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4868   chrec_a = chrec_convert (type, chrec_a, NULL);
4869   chrec_b = chrec_convert (type, chrec_b, NULL);
4870   difference = chrec_fold_minus (type, chrec_a, chrec_b);
4871
4872   if (eq_evolutions_p (chrec_a, chrec_b))
4873     {
4874       /* Access functions are the same: all the elements are accessed
4875          in the same order.  */
4876       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4877       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4878       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4879       dependence_stats.num_miv_dependent++;
4880     }
4881
4882   else if (evolution_function_is_constant_p (difference)
4883            && evolution_function_is_affine_multivariate_p (chrec_a,
4884                                                            loop_nest->num)
4885            && !gcd_of_steps_may_divide_p (chrec_a, difference))
4886     {
4887       /* testsuite/.../ssa-chrec-33.c
4888          {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
4889
4890          The difference is 1, and all the evolution steps are multiples
4891          of 2, consequently there are no overlapping elements.  */
4892       *overlaps_a = conflict_fn_no_dependence ();
4893       *overlaps_b = conflict_fn_no_dependence ();
4894       *last_conflicts = integer_zero_node;
4895       dependence_stats.num_miv_independent++;
4896     }
4897
4898   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4899            && !chrec_contains_symbols (chrec_a, loop_nest)
4900            && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4901            && !chrec_contains_symbols (chrec_b, loop_nest))
4902     {
4903       /* testsuite/.../ssa-chrec-35.c
4904          {0, +, 1}_2  vs.  {0, +, 1}_3
4905          the overlapping elements are respectively located at iterations:
4906          {0, +, 1}_x and {0, +, 1}_x,
4907          in other words, we have the equality:
4908          {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4909
4910          Other examples:
4911          {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4912          {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4913
4914          {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4915          {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4916       */
4917       analyze_subscript_affine_affine (chrec_a, chrec_b,
4918                                        overlaps_a, overlaps_b, last_conflicts);
4919
4920       if (CF_NOT_KNOWN_P (*overlaps_a)
4921           || CF_NOT_KNOWN_P (*overlaps_b))
4922         dependence_stats.num_miv_unimplemented++;
4923       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4924                || CF_NO_DEPENDENCE_P (*overlaps_b))
4925         dependence_stats.num_miv_independent++;
4926       else
4927         dependence_stats.num_miv_dependent++;
4928     }
4929
4930   else
4931     {
4932       /* When the analysis is too difficult, answer "don't know".  */
4933       if (dump_file && (dump_flags & TDF_DETAILS))
4934         fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4935
4936       *overlaps_a = conflict_fn_not_known ();
4937       *overlaps_b = conflict_fn_not_known ();
4938       *last_conflicts = chrec_dont_know;
4939       dependence_stats.num_miv_unimplemented++;
4940     }
4941
4942   if (dump_file && (dump_flags & TDF_DETAILS))
4943     fprintf (dump_file, ")\n");
4944 }
4945
4946 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4947    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
4948    OVERLAP_ITERATIONS_B are initialized with two functions that
4949    describe the iterations that contain conflicting elements.
4950
4951    Remark: For an integer k >= 0, the following equality is true:
4952
4953    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4954 */
4955
4956 static void
4957 analyze_overlapping_iterations (tree chrec_a,
4958                                 tree chrec_b,
4959                                 conflict_function **overlap_iterations_a,
4960                                 conflict_function **overlap_iterations_b,
4961                                 tree *last_conflicts, class loop *loop_nest)
4962 {
4963   unsigned int lnn = loop_nest->num;
4964
4965   dependence_stats.num_subscript_tests++;
4966
4967   if (dump_file && (dump_flags & TDF_DETAILS))
4968     {
4969       fprintf (dump_file, "(analyze_overlapping_iterations \n");
4970       fprintf (dump_file, "  (chrec_a = ");
4971       print_generic_expr (dump_file, chrec_a);
4972       fprintf (dump_file, ")\n  (chrec_b = ");
4973       print_generic_expr (dump_file, chrec_b);
4974       fprintf (dump_file, ")\n");
4975     }
4976
4977   if (chrec_a == NULL_TREE
4978       || chrec_b == NULL_TREE
4979       || chrec_contains_undetermined (chrec_a)
4980       || chrec_contains_undetermined (chrec_b))
4981     {
4982       dependence_stats.num_subscript_undetermined++;
4983
4984       *overlap_iterations_a = conflict_fn_not_known ();
4985       *overlap_iterations_b = conflict_fn_not_known ();
4986     }
4987
4988   /* If they are the same chrec, and are affine, they overlap
4989      on every iteration.  */
4990   else if (eq_evolutions_p (chrec_a, chrec_b)
4991            && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4992                || operand_equal_p (chrec_a, chrec_b, 0)))
4993     {
4994       dependence_stats.num_same_subscript_function++;
4995       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4996       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4997       *last_conflicts = chrec_dont_know;
4998     }
4999
5000   /* If they aren't the same, and aren't affine, we can't do anything
5001      yet.  */
5002   else if ((chrec_contains_symbols (chrec_a)
5003             || chrec_contains_symbols (chrec_b))
5004            && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5005                || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
5006     {
5007       dependence_stats.num_subscript_undetermined++;
5008       *overlap_iterations_a = conflict_fn_not_known ();
5009       *overlap_iterations_b = conflict_fn_not_known ();
5010     }
5011
5012   else if (ziv_subscript_p (chrec_a, chrec_b))
5013     analyze_ziv_subscript (chrec_a, chrec_b,
5014                            overlap_iterations_a, overlap_iterations_b,
5015                            last_conflicts);
5016
5017   else if (siv_subscript_p (chrec_a, chrec_b))
5018     analyze_siv_subscript (chrec_a, chrec_b,
5019                            overlap_iterations_a, overlap_iterations_b,
5020                            last_conflicts, lnn);
5021
5022   else
5023     analyze_miv_subscript (chrec_a, chrec_b,
5024                            overlap_iterations_a, overlap_iterations_b,
5025                            last_conflicts, loop_nest);
5026
5027   if (dump_file && (dump_flags & TDF_DETAILS))
5028     {
5029       fprintf (dump_file, "  (overlap_iterations_a = ");
5030       dump_conflict_function (dump_file, *overlap_iterations_a);
5031       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
5032       dump_conflict_function (dump_file, *overlap_iterations_b);
5033       fprintf (dump_file, "))\n");
5034     }
5035 }
5036
5037 /* Helper function for uniquely inserting distance vectors.  */
5038
5039 static void
5040 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
5041 {
5042   for (lambda_vector v : DDR_DIST_VECTS (ddr))
5043     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
5044       return;
5045
5046   DDR_DIST_VECTS (ddr).safe_push (dist_v);
5047 }
5048
5049 /* Helper function for uniquely inserting direction vectors.  */
5050
5051 static void
5052 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
5053 {
5054   for (lambda_vector v : DDR_DIR_VECTS (ddr))
5055     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
5056       return;
5057
5058   DDR_DIR_VECTS (ddr).safe_push (dir_v);
5059 }
5060
5061 /* Add a distance of 1 on all the loops outer than INDEX.  If we
5062    haven't yet determined a distance for this outer loop, push a new
5063    distance vector composed of the previous distance, and a distance
5064    of 1 for this outer loop.  Example:
5065
5066    | loop_1
5067    |   loop_2
5068    |     A[10]
5069    |   endloop_2
5070    | endloop_1
5071
5072    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
5073    save (0, 1), then we have to save (1, 0).  */
5074
5075 static void
5076 add_outer_distances (struct data_dependence_relation *ddr,
5077                      lambda_vector dist_v, int index)
5078 {
5079   /* For each outer loop where init_v is not set, the accesses are
5080      in dependence of distance 1 in the loop.  */
5081   while (--index >= 0)
5082     {
5083       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5084       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5085       save_v[index] = 1;
5086       save_dist_v (ddr, save_v);
5087     }
5088 }
5089
5090 /* Return false when fail to represent the data dependence as a
5091    distance vector.  A_INDEX is the index of the first reference
5092    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5093    second reference.  INIT_B is set to true when a component has been
5094    added to the distance vector DIST_V.  INDEX_CARRY is then set to
5095    the index in DIST_V that carries the dependence.  */
5096
5097 static bool
5098 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
5099                              unsigned int a_index, unsigned int b_index,
5100                              lambda_vector dist_v, bool *init_b,
5101                              int *index_carry)
5102 {
5103   unsigned i;
5104   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5105   class loop *loop = DDR_LOOP_NEST (ddr)[0];
5106
5107   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5108     {
5109       tree access_fn_a, access_fn_b;
5110       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
5111
5112       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5113         {
5114           non_affine_dependence_relation (ddr);
5115           return false;
5116         }
5117
5118       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
5119       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
5120
5121       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
5122           && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
5123         {
5124           HOST_WIDE_INT dist;
5125           int index;
5126           int var_a = CHREC_VARIABLE (access_fn_a);
5127           int var_b = CHREC_VARIABLE (access_fn_b);
5128
5129           if (var_a != var_b
5130               || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5131             {
5132               non_affine_dependence_relation (ddr);
5133               return false;
5134             }
5135
5136           /* When data references are collected in a loop while data
5137              dependences are analyzed in loop nest nested in the loop, we
5138              would have more number of access functions than number of
5139              loops.  Skip access functions of loops not in the loop nest.
5140
5141              See PR89725 for more information.  */
5142           if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
5143             continue;
5144
5145           dist = int_cst_value (SUB_DISTANCE (subscript));
5146           index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
5147           *index_carry = MIN (index, *index_carry);
5148
5149           /* This is the subscript coupling test.  If we have already
5150              recorded a distance for this loop (a distance coming from
5151              another subscript), it should be the same.  For example,
5152              in the following code, there is no dependence:
5153
5154              | loop i = 0, N, 1
5155              |   T[i+1][i] = ...
5156              |   ... = T[i][i]
5157              | endloop
5158           */
5159           if (init_v[index] != 0 && dist_v[index] != dist)
5160             {
5161               finalize_ddr_dependent (ddr, chrec_known);
5162               return false;
5163             }
5164
5165           dist_v[index] = dist;
5166           init_v[index] = 1;
5167           *init_b = true;
5168         }
5169       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
5170         {
5171           /* This can be for example an affine vs. constant dependence
5172              (T[i] vs. T[3]) that is not an affine dependence and is
5173              not representable as a distance vector.  */
5174           non_affine_dependence_relation (ddr);
5175           return false;
5176         }
5177       else
5178         *init_b = true;
5179     }
5180
5181   return true;
5182 }
5183
5184 /* Return true when the DDR contains only invariant access functions wrto. loop
5185    number LNUM.  */
5186
5187 static bool
5188 invariant_access_functions (const struct data_dependence_relation *ddr,
5189                             int lnum)
5190 {
5191   for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5192     if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
5193         || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
5194       return false;
5195
5196   return true;
5197 }
5198
5199 /* Helper function for the case where DDR_A and DDR_B are the same
5200    multivariate access function with a constant step.  For an example
5201    see pr34635-1.c.  */
5202
5203 static void
5204 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
5205 {
5206   int x_1, x_2;
5207   tree c_1 = CHREC_LEFT (c_2);
5208   tree c_0 = CHREC_LEFT (c_1);
5209   lambda_vector dist_v;
5210   HOST_WIDE_INT v1, v2, cd;
5211
5212   /* Polynomials with more than 2 variables are not handled yet.  When
5213      the evolution steps are parameters, it is not possible to
5214      represent the dependence using classical distance vectors.  */
5215   if (TREE_CODE (c_0) != INTEGER_CST
5216       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
5217       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
5218     {
5219       DDR_AFFINE_P (ddr) = false;
5220       return;
5221     }
5222
5223   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
5224   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
5225
5226   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
5227   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5228   v1 = int_cst_value (CHREC_RIGHT (c_1));
5229   v2 = int_cst_value (CHREC_RIGHT (c_2));
5230   cd = gcd (v1, v2);
5231   v1 /= cd;
5232   v2 /= cd;
5233
5234   if (v2 < 0)
5235     {
5236       v2 = -v2;
5237       v1 = -v1;
5238     }
5239
5240   dist_v[x_1] = v2;
5241   dist_v[x_2] = -v1;
5242   save_dist_v (ddr, dist_v);
5243
5244   add_outer_distances (ddr, dist_v, x_1);
5245 }
5246
5247 /* Helper function for the case where DDR_A and DDR_B are the same
5248    access functions.  */
5249
5250 static void
5251 add_other_self_distances (struct data_dependence_relation *ddr)
5252 {
5253   lambda_vector dist_v;
5254   unsigned i;
5255   int index_carry = DDR_NB_LOOPS (ddr);
5256   subscript *sub;
5257   class loop *loop = DDR_LOOP_NEST (ddr)[0];
5258
5259   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5260     {
5261       tree access_fun = SUB_ACCESS_FN (sub, 0);
5262
5263       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
5264         {
5265           if (!evolution_function_is_univariate_p (access_fun, loop->num))
5266             {
5267               if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
5268                 {
5269                   DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
5270                   return;
5271                 }
5272
5273               access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
5274
5275               if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
5276                 add_multivariate_self_dist (ddr, access_fun);
5277               else
5278                 /* The evolution step is not constant: it varies in
5279                    the outer loop, so this cannot be represented by a
5280                    distance vector.  For example in pr34635.c the
5281                    evolution is {0, +, {0, +, 4}_1}_2.  */
5282                 DDR_AFFINE_P (ddr) = false;
5283
5284               return;
5285             }
5286
5287           /* When data references are collected in a loop while data
5288              dependences are analyzed in loop nest nested in the loop, we
5289              would have more number of access functions than number of
5290              loops.  Skip access functions of loops not in the loop nest.
5291
5292              See PR89725 for more information.  */
5293           if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
5294                                   loop))
5295             continue;
5296
5297           index_carry = MIN (index_carry,
5298                              index_in_loop_nest (CHREC_VARIABLE (access_fun),
5299                                                  DDR_LOOP_NEST (ddr)));
5300         }
5301     }
5302
5303   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5304   add_outer_distances (ddr, dist_v, index_carry);
5305 }
5306
5307 static void
5308 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
5309 {
5310   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5311
5312   dist_v[0] = 1;
5313   save_dist_v (ddr, dist_v);
5314 }
5315
5316 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
5317    is the case for example when access functions are the same and
5318    equal to a constant, as in:
5319
5320    | loop_1
5321    |   A[3] = ...
5322    |   ... = A[3]
5323    | endloop_1
5324
5325    in which case the distance vectors are (0) and (1).  */
5326
5327 static void
5328 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
5329 {
5330   unsigned i, j;
5331
5332   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5333     {
5334       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
5335       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
5336       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
5337
5338       for (j = 0; j < ca->n; j++)
5339         if (affine_function_zero_p (ca->fns[j]))
5340           {
5341             insert_innermost_unit_dist_vector (ddr);
5342             return;
5343           }
5344
5345       for (j = 0; j < cb->n; j++)
5346         if (affine_function_zero_p (cb->fns[j]))
5347           {
5348             insert_innermost_unit_dist_vector (ddr);
5349             return;
5350           }
5351     }
5352 }
5353
5354 /* Return true when the DDR contains two data references that have the
5355    same access functions.  */
5356
5357 static inline bool
5358 same_access_functions (const struct data_dependence_relation *ddr)
5359 {
5360   for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5361     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5362                           SUB_ACCESS_FN (sub, 1)))
5363       return false;
5364
5365   return true;
5366 }
5367
5368 /* Compute the classic per loop distance vector.  DDR is the data
5369    dependence relation to build a vector from.  Return false when fail
5370    to represent the data dependence as a distance vector.  */
5371
5372 static bool
5373 build_classic_dist_vector (struct data_dependence_relation *ddr,
5374                            class loop *loop_nest)
5375 {
5376   bool init_b = false;
5377   int index_carry = DDR_NB_LOOPS (ddr);
5378   lambda_vector dist_v;
5379
5380   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5381     return false;
5382
5383   if (same_access_functions (ddr))
5384     {
5385       /* Save the 0 vector.  */
5386       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5387       save_dist_v (ddr, dist_v);
5388
5389       if (invariant_access_functions (ddr, loop_nest->num))
5390         add_distance_for_zero_overlaps (ddr);
5391
5392       if (DDR_NB_LOOPS (ddr) > 1)
5393         add_other_self_distances (ddr);
5394
5395       return true;
5396     }
5397
5398   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5399   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5400     return false;
5401
5402   /* Save the distance vector if we initialized one.  */
5403   if (init_b)
5404     {
5405       /* Verify a basic constraint: classic distance vectors should
5406          always be lexicographically positive.
5407
5408          Data references are collected in the order of execution of
5409          the program, thus for the following loop
5410
5411          | for (i = 1; i < 100; i++)
5412          |   for (j = 1; j < 100; j++)
5413          |     {
5414          |       t = T[j+1][i-1];  // A
5415          |       T[j][i] = t + 2;  // B
5416          |     }
5417
5418          references are collected following the direction of the wind:
5419          A then B.  The data dependence tests are performed also
5420          following this order, such that we're looking at the distance
5421          separating the elements accessed by A from the elements later
5422          accessed by B.  But in this example, the distance returned by
5423          test_dep (A, B) is lexicographically negative (-1, 1), that
5424          means that the access A occurs later than B with respect to
5425          the outer loop, ie. we're actually looking upwind.  In this
5426          case we solve test_dep (B, A) looking downwind to the
5427          lexicographically positive solution, that returns the
5428          distance vector (1, -1).  */
5429       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5430         {
5431           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5432           if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5433             return false;
5434           compute_subscript_distance (ddr);
5435           if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5436                                             &index_carry))
5437             return false;
5438           save_dist_v (ddr, save_v);
5439           DDR_REVERSED_P (ddr) = true;
5440
5441           /* In this case there is a dependence forward for all the
5442              outer loops:
5443
5444              | for (k = 1; k < 100; k++)
5445              |  for (i = 1; i < 100; i++)
5446              |   for (j = 1; j < 100; j++)
5447              |     {
5448              |       t = T[j+1][i-1];  // A
5449              |       T[j][i] = t + 2;  // B
5450              |     }
5451
5452              the vectors are:
5453              (0,  1, -1)
5454              (1,  1, -1)
5455              (1, -1,  1)
5456           */
5457           if (DDR_NB_LOOPS (ddr) > 1)
5458             {
5459               add_outer_distances (ddr, save_v, index_carry);
5460               add_outer_distances (ddr, dist_v, index_carry);
5461             }
5462         }
5463       else
5464         {
5465           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5466           lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5467
5468           if (DDR_NB_LOOPS (ddr) > 1)
5469             {
5470               lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5471
5472               if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5473                 return false;
5474               compute_subscript_distance (ddr);
5475               if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5476                                                 &index_carry))
5477                 return false;
5478
5479               save_dist_v (ddr, save_v);
5480               add_outer_distances (ddr, dist_v, index_carry);
5481               add_outer_distances (ddr, opposite_v, index_carry);
5482             }
5483           else
5484             save_dist_v (ddr, save_v);
5485         }
5486     }
5487   else
5488     {
5489       /* There is a distance of 1 on all the outer loops: Example:
5490          there is a dependence of distance 1 on loop_1 for the array A.
5491
5492          | loop_1
5493          |   A[5] = ...
5494          | endloop
5495       */
5496       add_outer_distances (ddr, dist_v,
5497                            lambda_vector_first_nz (dist_v,
5498                                                    DDR_NB_LOOPS (ddr), 0));
5499     }
5500
5501   if (dump_file && (dump_flags & TDF_DETAILS))
5502     {
5503       unsigned i;
5504
5505       fprintf (dump_file, "(build_classic_dist_vector\n");
5506       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5507         {
5508           fprintf (dump_file, "  dist_vector = (");
5509           print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5510                                DDR_NB_LOOPS (ddr));
5511           fprintf (dump_file, "  )\n");
5512         }
5513       fprintf (dump_file, ")\n");
5514     }
5515
5516   return true;
5517 }
5518
5519 /* Return the direction for a given distance.
5520    FIXME: Computing dir this way is suboptimal, since dir can catch
5521    cases that dist is unable to represent.  */
5522
5523 static inline enum data_dependence_direction
5524 dir_from_dist (int dist)
5525 {
5526   if (dist > 0)
5527     return dir_positive;
5528   else if (dist < 0)
5529     return dir_negative;
5530   else
5531     return dir_equal;
5532 }
5533
5534 /* Compute the classic per loop direction vector.  DDR is the data
5535    dependence relation to build a vector from.  */
5536
5537 static void
5538 build_classic_dir_vector (struct data_dependence_relation *ddr)
5539 {
5540   unsigned i, j;
5541   lambda_vector dist_v;
5542
5543   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5544     {
5545       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5546
5547       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5548         dir_v[j] = dir_from_dist (dist_v[j]);
5549
5550       save_dir_v (ddr, dir_v);
5551     }
5552 }
5553
5554 /* Helper function.  Returns true when there is a dependence between the
5555    data references.  A_INDEX is the index of the first reference (0 for
5556    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
5557
5558 static bool
5559 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5560                                unsigned int a_index, unsigned int b_index,
5561                                class loop *loop_nest)
5562 {
5563   unsigned int i;
5564   tree last_conflicts;
5565   struct subscript *subscript;
5566   tree res = NULL_TREE;
5567
5568   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5569     {
5570       conflict_function *overlaps_a, *overlaps_b;
5571
5572       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5573                                       SUB_ACCESS_FN (subscript, b_index),
5574                                       &overlaps_a, &overlaps_b,
5575                                       &last_conflicts, loop_nest);
5576
5577       if (SUB_CONFLICTS_IN_A (subscript))
5578         free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5579       if (SUB_CONFLICTS_IN_B (subscript))
5580         free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5581
5582       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5583       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5584       SUB_LAST_CONFLICT (subscript) = last_conflicts;
5585
5586       /* If there is any undetermined conflict function we have to
5587          give a conservative answer in case we cannot prove that
5588          no dependence exists when analyzing another subscript.  */
5589       if (CF_NOT_KNOWN_P (overlaps_a)
5590           || CF_NOT_KNOWN_P (overlaps_b))
5591         {
5592           res = chrec_dont_know;
5593           continue;
5594         }
5595
5596       /* When there is a subscript with no dependence we can stop.  */
5597       else if (CF_NO_DEPENDENCE_P (overlaps_a)
5598                || CF_NO_DEPENDENCE_P (overlaps_b))
5599         {
5600           res = chrec_known;
5601           break;
5602         }
5603     }
5604
5605   if (res == NULL_TREE)
5606     return true;
5607
5608   if (res == chrec_known)
5609     dependence_stats.num_dependence_independent++;
5610   else
5611     dependence_stats.num_dependence_undetermined++;
5612   finalize_ddr_dependent (ddr, res);
5613   return false;
5614 }
5615
5616 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
5617
5618 static void
5619 subscript_dependence_tester (struct data_dependence_relation *ddr,
5620                              class loop *loop_nest)
5621 {
5622   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5623     dependence_stats.num_dependence_dependent++;
5624
5625   compute_subscript_distance (ddr);
5626   if (build_classic_dist_vector (ddr, loop_nest))
5627     build_classic_dir_vector (ddr);
5628 }
5629
5630 /* Returns true when all the access functions of A are affine or
5631    constant with respect to LOOP_NEST.  */
5632
5633 static bool
5634 access_functions_are_affine_or_constant_p (const struct data_reference *a,
5635                                            const class loop *loop_nest)
5636 {
5637   vec<tree> fns = DR_ACCESS_FNS (a);
5638   for (tree t : fns)
5639     if (!evolution_function_is_invariant_p (t, loop_nest->num)
5640         && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5641       return false;
5642
5643   return true;
5644 }
5645
5646 /* This computes the affine dependence relation between A and B with
5647    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
5648    independence between two accesses, while CHREC_DONT_KNOW is used
5649    for representing the unknown relation.
5650
5651    Note that it is possible to stop the computation of the dependence
5652    relation the first time we detect a CHREC_KNOWN element for a given
5653    subscript.  */
5654
5655 void
5656 compute_affine_dependence (struct data_dependence_relation *ddr,
5657                            class loop *loop_nest)
5658 {
5659   struct data_reference *dra = DDR_A (ddr);
5660   struct data_reference *drb = DDR_B (ddr);
5661
5662   if (dump_file && (dump_flags & TDF_DETAILS))
5663     {
5664       fprintf (dump_file, "(compute_affine_dependence\n");
5665       fprintf (dump_file, "  ref_a: ");
5666       print_generic_expr (dump_file, DR_REF (dra));
5667       fprintf (dump_file, ", stmt_a: ");
5668       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5669       fprintf (dump_file, "  ref_b: ");
5670       print_generic_expr (dump_file, DR_REF (drb));
5671       fprintf (dump_file, ", stmt_b: ");
5672       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5673     }
5674
5675   /* Analyze only when the dependence relation is not yet known.  */
5676   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5677     {
5678       dependence_stats.num_dependence_tests++;
5679
5680       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5681           && access_functions_are_affine_or_constant_p (drb, loop_nest))
5682         subscript_dependence_tester (ddr, loop_nest);
5683
5684       /* As a last case, if the dependence cannot be determined, or if
5685          the dependence is considered too difficult to determine, answer
5686          "don't know".  */
5687       else
5688         {
5689           dependence_stats.num_dependence_undetermined++;
5690
5691           if (dump_file && (dump_flags & TDF_DETAILS))
5692             {
5693               fprintf (dump_file, "Data ref a:\n");
5694               dump_data_reference (dump_file, dra);
5695               fprintf (dump_file, "Data ref b:\n");
5696               dump_data_reference (dump_file, drb);
5697               fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5698             }
5699           finalize_ddr_dependent (ddr, chrec_dont_know);
5700         }
5701     }
5702
5703   if (dump_file && (dump_flags & TDF_DETAILS))
5704     {
5705       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5706         fprintf (dump_file, ") -> no dependence\n");
5707       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5708         fprintf (dump_file, ") -> dependence analysis failed\n");
5709       else
5710         fprintf (dump_file, ")\n");
5711     }
5712 }
5713
5714 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5715    the data references in DATAREFS, in the LOOP_NEST.  When
5716    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5717    relations.  Return true when successful, i.e. data references number
5718    is small enough to be handled.  */
5719
5720 bool
5721 compute_all_dependences (const vec<data_reference_p> &datarefs,
5722                          vec<ddr_p> *dependence_relations,
5723                          const vec<loop_p> &loop_nest,
5724                          bool compute_self_and_rr)
5725 {
5726   struct data_dependence_relation *ddr;
5727   struct data_reference *a, *b;
5728   unsigned int i, j;
5729
5730   if ((int) datarefs.length ()
5731       > param_loop_max_datarefs_for_datadeps)
5732     {
5733       struct data_dependence_relation *ddr;
5734
5735       /* Insert a single relation into dependence_relations:
5736          chrec_dont_know.  */
5737       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5738       dependence_relations->safe_push (ddr);
5739       return false;
5740     }
5741
5742   FOR_EACH_VEC_ELT (datarefs, i, a)
5743     for (j = i + 1; datarefs.iterate (j, &b); j++)
5744       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5745         {
5746           ddr = initialize_data_dependence_relation (a, b, loop_nest);
5747           dependence_relations->safe_push (ddr);
5748           if (loop_nest.exists ())
5749             compute_affine_dependence (ddr, loop_nest[0]);
5750         }
5751
5752   if (compute_self_and_rr)
5753     FOR_EACH_VEC_ELT (datarefs, i, a)
5754       {
5755         ddr = initialize_data_dependence_relation (a, a, loop_nest);
5756         dependence_relations->safe_push (ddr);
5757         if (loop_nest.exists ())
5758           compute_affine_dependence (ddr, loop_nest[0]);
5759       }
5760
5761   return true;
5762 }
5763
5764 /* Describes a location of a memory reference.  */
5765
5766 struct data_ref_loc
5767 {
5768   /* The memory reference.  */
5769   tree ref;
5770
5771   /* True if the memory reference is read.  */
5772   bool is_read;
5773
5774   /* True if the data reference is conditional within the containing
5775      statement, i.e. if it might not occur even when the statement
5776      is executed and runs to completion.  */
5777   bool is_conditional_in_stmt;
5778 };
5779
5780
5781 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
5782    true if STMT clobbers memory, false otherwise.  */
5783
5784 static bool
5785 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5786 {
5787   bool clobbers_memory = false;
5788   data_ref_loc ref;
5789   tree op0, op1;
5790   enum gimple_code stmt_code = gimple_code (stmt);
5791
5792   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5793      As we cannot model data-references to not spelled out
5794      accesses give up if they may occur.  */
5795   if (stmt_code == GIMPLE_CALL
5796       && !(gimple_call_flags (stmt) & ECF_CONST))
5797     {
5798       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
5799       if (gimple_call_internal_p (stmt))
5800         switch (gimple_call_internal_fn (stmt))
5801           {
5802           case IFN_GOMP_SIMD_LANE:
5803             {
5804               class loop *loop = gimple_bb (stmt)->loop_father;
5805               tree uid = gimple_call_arg (stmt, 0);
5806               gcc_assert (TREE_CODE (uid) == SSA_NAME);
5807               if (loop == NULL
5808                   || loop->simduid != SSA_NAME_VAR (uid))
5809                 clobbers_memory = true;
5810               break;
5811             }
5812           case IFN_MASK_LOAD:
5813           case IFN_MASK_STORE:
5814             break;
5815           default:
5816             clobbers_memory = true;
5817             break;
5818           }
5819       else
5820         clobbers_memory = true;
5821     }
5822   else if (stmt_code == GIMPLE_ASM
5823            && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5824                || gimple_vuse (stmt)))
5825     clobbers_memory = true;
5826
5827   if (!gimple_vuse (stmt))
5828     return clobbers_memory;
5829
5830   if (stmt_code == GIMPLE_ASSIGN)
5831     {
5832       tree base;
5833       op0 = gimple_assign_lhs (stmt);
5834       op1 = gimple_assign_rhs1 (stmt);
5835
5836       if (DECL_P (op1)
5837           || (REFERENCE_CLASS_P (op1)
5838               && (base = get_base_address (op1))
5839               && TREE_CODE (base) != SSA_NAME
5840               && !is_gimple_min_invariant (base)))
5841         {
5842           ref.ref = op1;
5843           ref.is_read = true;
5844           ref.is_conditional_in_stmt = false;
5845           references->safe_push (ref);
5846         }
5847     }
5848   else if (stmt_code == GIMPLE_CALL)
5849     {
5850       unsigned i, n;
5851       tree ptr, type;
5852       unsigned int align;
5853
5854       ref.is_read = false;
5855       if (gimple_call_internal_p (stmt))
5856         switch (gimple_call_internal_fn (stmt))
5857           {
5858           case IFN_MASK_LOAD:
5859             if (gimple_call_lhs (stmt) == NULL_TREE)
5860               break;
5861             ref.is_read = true;
5862             /* FALLTHRU */
5863           case IFN_MASK_STORE:
5864             ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5865             align = tree_to_shwi (gimple_call_arg (stmt, 1));
5866             if (ref.is_read)
5867               type = TREE_TYPE (gimple_call_lhs (stmt));
5868             else
5869               type = TREE_TYPE (gimple_call_arg (stmt, 3));
5870             if (TYPE_ALIGN (type) != align)
5871               type = build_aligned_type (type, align);
5872             ref.is_conditional_in_stmt = true;
5873             ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5874                                    ptr);
5875             references->safe_push (ref);
5876             return false;
5877           default:
5878             break;
5879           }
5880
5881       op0 = gimple_call_lhs (stmt);
5882       n = gimple_call_num_args (stmt);
5883       for (i = 0; i < n; i++)
5884         {
5885           op1 = gimple_call_arg (stmt, i);
5886
5887           if (DECL_P (op1)
5888               || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5889             {
5890               ref.ref = op1;
5891               ref.is_read = true;
5892               ref.is_conditional_in_stmt = false;
5893               references->safe_push (ref);
5894             }
5895         }
5896     }
5897   else
5898     return clobbers_memory;
5899
5900   if (op0
5901       && (DECL_P (op0)
5902           || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5903     {
5904       ref.ref = op0;
5905       ref.is_read = false;
5906       ref.is_conditional_in_stmt = false;
5907       references->safe_push (ref);
5908     }
5909   return clobbers_memory;
5910 }
5911
5912
5913 /* Returns true if the loop-nest has any data reference.  */
5914
5915 bool
5916 loop_nest_has_data_refs (loop_p loop)
5917 {
5918   basic_block *bbs = get_loop_body (loop);
5919   auto_vec<data_ref_loc, 3> references;
5920
5921   for (unsigned i = 0; i < loop->num_nodes; i++)
5922     {
5923       basic_block bb = bbs[i];
5924       gimple_stmt_iterator bsi;
5925
5926       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5927         {
5928           gimple *stmt = gsi_stmt (bsi);
5929           get_references_in_stmt (stmt, &references);
5930           if (references.length ())
5931             {
5932               free (bbs);
5933               return true;
5934             }
5935         }
5936     }
5937   free (bbs);
5938   return false;
5939 }
5940
5941 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
5942    reference, returns false, otherwise returns true.  NEST is the outermost
5943    loop of the loop nest in which the references should be analyzed.  */
5944
5945 opt_result
5946 find_data_references_in_stmt (class loop *nest, gimple *stmt,
5947                               vec<data_reference_p> *datarefs)
5948 {
5949   auto_vec<data_ref_loc, 2> references;
5950   data_reference_p dr;
5951
5952   if (get_references_in_stmt (stmt, &references))
5953     return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5954                                    stmt);
5955
5956   for (const data_ref_loc &ref : references)
5957     {
5958       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5959                             loop_containing_stmt (stmt), ref.ref,
5960                             stmt, ref.is_read, ref.is_conditional_in_stmt);
5961       gcc_assert (dr != NULL);
5962       datarefs->safe_push (dr);
5963     }
5964
5965   return opt_result::success ();
5966 }
5967
5968 /* Stores the data references in STMT to DATAREFS.  If there is an
5969    unanalyzable reference, returns false, otherwise returns true.
5970    NEST is the outermost loop of the loop nest in which the references
5971    should be instantiated, LOOP is the loop in which the references
5972    should be analyzed.  */
5973
5974 bool
5975 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5976                                        vec<data_reference_p> *datarefs)
5977 {
5978   auto_vec<data_ref_loc, 2> references;
5979   bool ret = true;
5980   data_reference_p dr;
5981
5982   if (get_references_in_stmt (stmt, &references))
5983     return false;
5984
5985   for (const data_ref_loc &ref : references)
5986     {
5987       dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read,
5988                             ref.is_conditional_in_stmt);
5989       gcc_assert (dr != NULL);
5990       datarefs->safe_push (dr);
5991     }
5992
5993   return ret;
5994 }
5995
5996 /* Search the data references in LOOP, and record the information into
5997    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5998    difficult case, returns NULL_TREE otherwise.  */
5999
6000 tree
6001 find_data_references_in_bb (class loop *loop, basic_block bb,
6002                             vec<data_reference_p> *datarefs)
6003 {
6004   gimple_stmt_iterator bsi;
6005
6006   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
6007     {
6008       gimple *stmt = gsi_stmt (bsi);
6009
6010       if (!find_data_references_in_stmt (loop, stmt, datarefs))
6011         {
6012           struct data_reference *res;
6013           res = XCNEW (struct data_reference);
6014           datarefs->safe_push (res);
6015
6016           return chrec_dont_know;
6017         }
6018     }
6019
6020   return NULL_TREE;
6021 }
6022
6023 /* Search the data references in LOOP, and record the information into
6024    DATAREFS.  Returns chrec_dont_know when failing to analyze a
6025    difficult case, returns NULL_TREE otherwise.
6026
6027    TODO: This function should be made smarter so that it can handle address
6028    arithmetic as if they were array accesses, etc.  */
6029
6030 tree
6031 find_data_references_in_loop (class loop *loop,
6032                               vec<data_reference_p> *datarefs)
6033 {
6034   basic_block bb, *bbs;
6035   unsigned int i;
6036
6037   bbs = get_loop_body_in_dom_order (loop);
6038
6039   for (i = 0; i < loop->num_nodes; i++)
6040     {
6041       bb = bbs[i];
6042
6043       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
6044         {
6045           free (bbs);
6046           return chrec_dont_know;
6047         }
6048     }
6049   free (bbs);
6050
6051   return NULL_TREE;
6052 }
6053
6054 /* Return the alignment in bytes that DRB is guaranteed to have at all
6055    times.  */
6056
6057 unsigned int
6058 dr_alignment (innermost_loop_behavior *drb)
6059 {
6060   /* Get the alignment of BASE_ADDRESS + INIT.  */
6061   unsigned int alignment = drb->base_alignment;
6062   unsigned int misalignment = (drb->base_misalignment
6063                                + TREE_INT_CST_LOW (drb->init));
6064   if (misalignment != 0)
6065     alignment = MIN (alignment, misalignment & -misalignment);
6066
6067   /* Cap it to the alignment of OFFSET.  */
6068   if (!integer_zerop (drb->offset))
6069     alignment = MIN (alignment, drb->offset_alignment);
6070
6071   /* Cap it to the alignment of STEP.  */
6072   if (!integer_zerop (drb->step))
6073     alignment = MIN (alignment, drb->step_alignment);
6074
6075   return alignment;
6076 }
6077
6078 /* If BASE is a pointer-typed SSA name, try to find the object that it
6079    is based on.  Return this object X on success and store the alignment
6080    in bytes of BASE - &X in *ALIGNMENT_OUT.  */
6081
6082 static tree
6083 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
6084 {
6085   if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
6086     return NULL_TREE;
6087
6088   gimple *def = SSA_NAME_DEF_STMT (base);
6089   base = analyze_scalar_evolution (loop_containing_stmt (def), base);
6090
6091   /* Peel chrecs and record the minimum alignment preserved by
6092      all steps.  */
6093   unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6094   while (TREE_CODE (base) == POLYNOMIAL_CHREC)
6095     {
6096       unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
6097       alignment = MIN (alignment, step_alignment);
6098       base = CHREC_LEFT (base);
6099     }
6100
6101   /* Punt if the expression is too complicated to handle.  */
6102   if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
6103     return NULL_TREE;
6104
6105   /* The only useful cases are those for which a dereference folds to something
6106      other than an INDIRECT_REF.  */
6107   tree ref_type = TREE_TYPE (TREE_TYPE (base));
6108   tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
6109   if (!ref)
6110     return NULL_TREE;
6111
6112   /* Analyze the base to which the steps we peeled were applied.  */
6113   poly_int64 bitsize, bitpos, bytepos;
6114   machine_mode mode;
6115   int unsignedp, reversep, volatilep;
6116   tree offset;
6117   base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
6118                               &unsignedp, &reversep, &volatilep);
6119   if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
6120     return NULL_TREE;
6121
6122   /* Restrict the alignment to that guaranteed by the offsets.  */
6123   unsigned int bytepos_alignment = known_alignment (bytepos);
6124   if (bytepos_alignment != 0)
6125     alignment = MIN (alignment, bytepos_alignment);
6126   if (offset)
6127     {
6128       unsigned int offset_alignment = highest_pow2_factor (offset);
6129       alignment = MIN (alignment, offset_alignment);
6130     }
6131
6132   *alignment_out = alignment;
6133   return base;
6134 }
6135
6136 /* Return the object whose alignment would need to be changed in order
6137    to increase the alignment of ADDR.  Store the maximum achievable
6138    alignment in *MAX_ALIGNMENT.  */
6139
6140 tree
6141 get_base_for_alignment (tree addr, unsigned int *max_alignment)
6142 {
6143   tree base = get_base_for_alignment_1 (addr, max_alignment);
6144   if (base)
6145     return base;
6146
6147   if (TREE_CODE (addr) == ADDR_EXPR)
6148     addr = TREE_OPERAND (addr, 0);
6149   *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6150   return addr;
6151 }
6152
6153 /* Recursive helper function.  */
6154
6155 static bool
6156 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
6157 {
6158   /* Inner loops of the nest should not contain siblings.  Example:
6159      when there are two consecutive loops,
6160
6161      | loop_0
6162      |   loop_1
6163      |     A[{0, +, 1}_1]
6164      |   endloop_1
6165      |   loop_2
6166      |     A[{0, +, 1}_2]
6167      |   endloop_2
6168      | endloop_0
6169
6170      the dependence relation cannot be captured by the distance
6171      abstraction.  */
6172   if (loop->next)
6173     return false;
6174
6175   loop_nest->safe_push (loop);
6176   if (loop->inner)
6177     return find_loop_nest_1 (loop->inner, loop_nest);
6178   return true;
6179 }
6180
6181 /* Return false when the LOOP is not well nested.  Otherwise return
6182    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
6183    contain the loops from the outermost to the innermost, as they will
6184    appear in the classic distance vector.  */
6185
6186 bool
6187 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
6188 {
6189   loop_nest->safe_push (loop);
6190   if (loop->inner)
6191     return find_loop_nest_1 (loop->inner, loop_nest);
6192   return true;
6193 }
6194
6195 /* Returns true when the data dependences have been computed, false otherwise.
6196    Given a loop nest LOOP, the following vectors are returned:
6197    DATAREFS is initialized to all the array elements contained in this loop,
6198    DEPENDENCE_RELATIONS contains the relations between the data references.
6199    Compute read-read and self relations if
6200    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
6201
6202 bool
6203 compute_data_dependences_for_loop (class loop *loop,
6204                                    bool compute_self_and_read_read_dependences,
6205                                    vec<loop_p> *loop_nest,
6206                                    vec<data_reference_p> *datarefs,
6207                                    vec<ddr_p> *dependence_relations)
6208 {
6209   bool res = true;
6210
6211   memset (&dependence_stats, 0, sizeof (dependence_stats));
6212
6213   /* If the loop nest is not well formed, or one of the data references
6214      is not computable, give up without spending time to compute other
6215      dependences.  */
6216   if (!loop
6217       || !find_loop_nest (loop, loop_nest)
6218       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
6219       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
6220                                    compute_self_and_read_read_dependences))
6221     res = false;
6222
6223   if (dump_file && (dump_flags & TDF_STATS))
6224     {
6225       fprintf (dump_file, "Dependence tester statistics:\n");
6226
6227       fprintf (dump_file, "Number of dependence tests: %d\n",
6228                dependence_stats.num_dependence_tests);
6229       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
6230                dependence_stats.num_dependence_dependent);
6231       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
6232                dependence_stats.num_dependence_independent);
6233       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
6234                dependence_stats.num_dependence_undetermined);
6235
6236       fprintf (dump_file, "Number of subscript tests: %d\n",
6237                dependence_stats.num_subscript_tests);
6238       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
6239                dependence_stats.num_subscript_undetermined);
6240       fprintf (dump_file, "Number of same subscript function: %d\n",
6241                dependence_stats.num_same_subscript_function);
6242
6243       fprintf (dump_file, "Number of ziv tests: %d\n",
6244                dependence_stats.num_ziv);
6245       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
6246                dependence_stats.num_ziv_dependent);
6247       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
6248                dependence_stats.num_ziv_independent);
6249       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
6250                dependence_stats.num_ziv_unimplemented);
6251
6252       fprintf (dump_file, "Number of siv tests: %d\n",
6253                dependence_stats.num_siv);
6254       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
6255                dependence_stats.num_siv_dependent);
6256       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
6257                dependence_stats.num_siv_independent);
6258       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
6259                dependence_stats.num_siv_unimplemented);
6260
6261       fprintf (dump_file, "Number of miv tests: %d\n",
6262                dependence_stats.num_miv);
6263       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
6264                dependence_stats.num_miv_dependent);
6265       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
6266                dependence_stats.num_miv_independent);
6267       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
6268                dependence_stats.num_miv_unimplemented);
6269     }
6270
6271   return res;
6272 }
6273
6274 /* Free the memory used by a data dependence relation DDR.  */
6275
6276 void
6277 free_dependence_relation (struct data_dependence_relation *ddr)
6278 {
6279   if (ddr == NULL)
6280     return;
6281
6282   if (DDR_SUBSCRIPTS (ddr).exists ())
6283     free_subscripts (DDR_SUBSCRIPTS (ddr));
6284   DDR_DIST_VECTS (ddr).release ();
6285   DDR_DIR_VECTS (ddr).release ();
6286
6287   free (ddr);
6288 }
6289
6290 /* Free the memory used by the data dependence relations from
6291    DEPENDENCE_RELATIONS.  */
6292
6293 void
6294 free_dependence_relations (vec<ddr_p>& dependence_relations)
6295 {
6296   for (data_dependence_relation *ddr : dependence_relations)
6297     if (ddr)
6298       free_dependence_relation (ddr);
6299
6300   dependence_relations.release ();
6301 }
6302
6303 /* Free the memory used by the data references from DATAREFS.  */
6304
6305 void
6306 free_data_refs (vec<data_reference_p>& datarefs)
6307 {
6308   for (data_reference *dr : datarefs)
6309     free_data_ref (dr);
6310   datarefs.release ();
6311 }
6312
6313 /* Common routine implementing both dr_direction_indicator and
6314    dr_zero_step_indicator.  Return USEFUL_MIN if the indicator is known
6315    to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6316    Return the step as the indicator otherwise.  */
6317
6318 static tree
6319 dr_step_indicator (struct data_reference *dr, int useful_min)
6320 {
6321   tree step = DR_STEP (dr);
6322   if (!step)
6323     return NULL_TREE;
6324   STRIP_NOPS (step);
6325   /* Look for cases where the step is scaled by a positive constant
6326      integer, which will often be the access size.  If the multiplication
6327      doesn't change the sign (due to overflow effects) then we can
6328      test the unscaled value instead.  */
6329   if (TREE_CODE (step) == MULT_EXPR
6330       && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6331       && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6332     {
6333       tree factor = TREE_OPERAND (step, 1);
6334       step = TREE_OPERAND (step, 0);
6335
6336       /* Strip widening and truncating conversions as well as nops.  */
6337       if (CONVERT_EXPR_P (step)
6338           && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6339         step = TREE_OPERAND (step, 0);
6340       tree type = TREE_TYPE (step);
6341
6342       /* Get the range of step values that would not cause overflow.  */
6343       widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6344                          / wi::to_widest (factor));
6345       widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6346                          / wi::to_widest (factor));
6347
6348       /* Get the range of values that the unconverted step actually has.  */
6349       wide_int step_min, step_max;
6350       value_range vr;
6351       if (TREE_CODE (step) != SSA_NAME
6352           || !get_range_query (cfun)->range_of_expr (vr, step)
6353           || vr.kind () != VR_RANGE)
6354         {
6355           step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6356           step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6357         }
6358       else
6359         {
6360           step_min = vr.lower_bound ();
6361           step_max = vr.upper_bound ();
6362         }
6363
6364       /* Check whether the unconverted step has an acceptable range.  */
6365       signop sgn = TYPE_SIGN (type);
6366       if (wi::les_p (minv, widest_int::from (step_min, sgn))
6367           && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6368         {
6369           if (wi::ge_p (step_min, useful_min, sgn))
6370             return ssize_int (useful_min);
6371           else if (wi::lt_p (step_max, 0, sgn))
6372             return ssize_int (-1);
6373           else
6374             return fold_convert (ssizetype, step);
6375         }
6376     }
6377   return DR_STEP (dr);
6378 }
6379
6380 /* Return a value that is negative iff DR has a negative step.  */
6381
6382 tree
6383 dr_direction_indicator (struct data_reference *dr)
6384 {
6385   return dr_step_indicator (dr, 0);
6386 }
6387
6388 /* Return a value that is zero iff DR has a zero step.  */
6389
6390 tree
6391 dr_zero_step_indicator (struct data_reference *dr)
6392 {
6393   return dr_step_indicator (dr, 1);
6394 }
6395
6396 /* Return true if DR is known to have a nonnegative (but possibly zero)
6397    step.  */
6398
6399 bool
6400 dr_known_forward_stride_p (struct data_reference *dr)
6401 {
6402   tree indicator = dr_direction_indicator (dr);
6403   tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6404                                    fold_convert (ssizetype, indicator),
6405                                    ssize_int (0));
6406   return neg_step_val && integer_zerop (neg_step_val);
6407 }