gcc/tree-data-ref.c

   1 /* Data references and dependences detectors.
   2    Copyright (C) 2003-2018 Free Software Foundation, Inc.
   3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass walks a given loop structure searching for array
  22    references.  The information about the array accesses is recorded
  23    in DATA_REFERENCE structures.
  24
  25    The basic test for determining the dependences is:
  26    given two access functions chrec1 and chrec2 to a same array, and
  27    x and y two vectors from the iteration domain, the same element of
  28    the array is accessed twice at iterations x and y if and only if:
  29    |             chrec1 (x) == chrec2 (y).
  30
  31    The goals of this analysis are:
  32
  33    - to determine the independence: the relation between two
  34      independent accesses is qualified with the chrec_known (this
  35      information allows a loop parallelization),
  36
  37    - when two data references access the same data, to qualify the
  38      dependence relation with classic dependence representations:
  39
  40        - distance vectors
  41        - direction vectors
  42        - loop carried level dependence
  43        - polyhedron dependence
  44      or with the chains of recurrences based representation,
  45
  46    - to define a knowledge base for storing the data dependence
  47      information,
  48
  49    - to define an interface to access this data.
  50
  51
  52    Definitions:
  53
  54    - subscript: given two array accesses a subscript is the tuple
  55    composed of the access functions for a given dimension.  Example:
  56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
  57    (f1, g1), (f2, g2), (f3, g3).
  58
  59    - Diophantine equation: an equation whose coefficients and
  60    solutions are integer constants, for example the equation
  61    |   3*x + 2*y = 1
  62    has an integer solution x = 1 and y = -1.
  63
  64    References:
  65
  66    - "Advanced Compilation for High Performance Computing" by Randy
  67    Allen and Ken Kennedy.
  68    http://citeseer.ist.psu.edu/goff91practical.html
  69
  70    - "Loop Transformations for Restructuring Compilers - The Foundations"
  71    by Utpal Banerjee.
  72
  73
  74 */
  75
  76 #include "config.h"
  77 #include "system.h"
  78 #include "coretypes.h"
  79 #include "backend.h"
  80 #include "rtl.h"
  81 #include "tree.h"
  82 #include "gimple.h"
  83 #include "gimple-pretty-print.h"
  84 #include "alias.h"
  85 #include "fold-const.h"
  86 #include "expr.h"
  87 #include "gimple-iterator.h"
  88 #include "tree-ssa-loop-niter.h"
  89 #include "tree-ssa-loop.h"
  90 #include "tree-ssa.h"
  91 #include "cfgloop.h"
  92 #include "tree-data-ref.h"
  93 #include "tree-scalar-evolution.h"
  94 #include "dumpfile.h"
  95 #include "tree-affine.h"
  96 #include "params.h"
  97 #include "builtins.h"
  98 #include "stringpool.h"
  99 #include "tree-vrp.h"
 100 #include "tree-ssanames.h"
 101 #include "tree-eh.h"
 102
 103 static struct datadep_stats
 104 {
 105   int num_dependence_tests;
 106   int num_dependence_dependent;
 107   int num_dependence_independent;
 108   int num_dependence_undetermined;
 109
 110   int num_subscript_tests;
 111   int num_subscript_undetermined;
 112   int num_same_subscript_function;
 113
 114   int num_ziv;
 115   int num_ziv_independent;
 116   int num_ziv_dependent;
 117   int num_ziv_unimplemented;
 118
 119   int num_siv;
 120   int num_siv_independent;
 121   int num_siv_dependent;
 122   int num_siv_unimplemented;
 123
 124   int num_miv;
 125   int num_miv_independent;
 126   int num_miv_dependent;
 127   int num_miv_unimplemented;
 128 } dependence_stats;
 129
 130 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
 131                                            unsigned int, unsigned int,
 132                                            struct loop *);
 133 /* Returns true iff A divides B.  */
 134
 135 static inline bool
 136 tree_fold_divides_p (const_tree a, const_tree b)
 137 {
 138   gcc_assert (TREE_CODE (a) == INTEGER_CST);
 139   gcc_assert (TREE_CODE (b) == INTEGER_CST);
 140   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
 141 }
 142
 143 /* Returns true iff A divides B.  */
 144
 145 static inline bool
 146 int_divides_p (int a, int b)
 147 {
 148   return ((b % a) == 0);
 149 }
 150
 151 /* Return true if reference REF contains a union access.  */
 152
 153 static bool
 154 ref_contains_union_access_p (tree ref)
 155 {
 156   while (handled_component_p (ref))
 157     {
 158       ref = TREE_OPERAND (ref, 0);
 159       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
 160           || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
 161         return true;
 162     }
 163   return false;
 164 }
 165
 166 \f
 167
 168 /* Dump into FILE all the data references from DATAREFS.  */
 169
 170 static void
 171 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
 172 {
 173   unsigned int i;
 174   struct data_reference *dr;
 175
 176   FOR_EACH_VEC_ELT (datarefs, i, dr)
 177     dump_data_reference (file, dr);
 178 }
 179
 180 /* Unified dump into FILE all the data references from DATAREFS.  */
 181
 182 DEBUG_FUNCTION void
 183 debug (vec<data_reference_p> &ref)
 184 {
 185   dump_data_references (stderr, ref);
 186 }
 187
 188 DEBUG_FUNCTION void
 189 debug (vec<data_reference_p> *ptr)
 190 {
 191   if (ptr)
 192     debug (*ptr);
 193   else
 194     fprintf (stderr, "<nil>\n");
 195 }
 196
 197
 198 /* Dump into STDERR all the data references from DATAREFS.  */
 199
 200 DEBUG_FUNCTION void
 201 debug_data_references (vec<data_reference_p> datarefs)
 202 {
 203   dump_data_references (stderr, datarefs);
 204 }
 205
 206 /* Print to STDERR the data_reference DR.  */
 207
 208 DEBUG_FUNCTION void
 209 debug_data_reference (struct data_reference *dr)
 210 {
 211   dump_data_reference (stderr, dr);
 212 }
 213
 214 /* Dump function for a DATA_REFERENCE structure.  */
 215
 216 void
 217 dump_data_reference (FILE *outf,
 218                      struct data_reference *dr)
 219 {
 220   unsigned int i;
 221
 222   fprintf (outf, "#(Data Ref: \n");
 223   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
 224   fprintf (outf, "#  stmt: ");
 225   print_gimple_stmt (outf, DR_STMT (dr), 0);
 226   fprintf (outf, "#  ref: ");
 227   print_generic_stmt (outf, DR_REF (dr));
 228   fprintf (outf, "#  base_object: ");
 229   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
 230
 231   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
 232     {
 233       fprintf (outf, "#  Access function %d: ", i);
 234       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
 235     }
 236   fprintf (outf, "#)\n");
 237 }
 238
 239 /* Unified dump function for a DATA_REFERENCE structure.  */
 240
 241 DEBUG_FUNCTION void
 242 debug (data_reference &ref)
 243 {
 244   dump_data_reference (stderr, &ref);
 245 }
 246
 247 DEBUG_FUNCTION void
 248 debug (data_reference *ptr)
 249 {
 250   if (ptr)
 251     debug (*ptr);
 252   else
 253     fprintf (stderr, "<nil>\n");
 254 }
 255
 256
 257 /* Dumps the affine function described by FN to the file OUTF.  */
 258
 259 DEBUG_FUNCTION void
 260 dump_affine_function (FILE *outf, affine_fn fn)
 261 {
 262   unsigned i;
 263   tree coef;
 264
 265   print_generic_expr (outf, fn[0], TDF_SLIM);
 266   for (i = 1; fn.iterate (i, &coef); i++)
 267     {
 268       fprintf (outf, " + ");
 269       print_generic_expr (outf, coef, TDF_SLIM);
 270       fprintf (outf, " * x_%u", i);
 271     }
 272 }
 273
 274 /* Dumps the conflict function CF to the file OUTF.  */
 275
 276 DEBUG_FUNCTION void
 277 dump_conflict_function (FILE *outf, conflict_function *cf)
 278 {
 279   unsigned i;
 280
 281   if (cf->n == NO_DEPENDENCE)
 282     fprintf (outf, "no dependence");
 283   else if (cf->n == NOT_KNOWN)
 284     fprintf (outf, "not known");
 285   else
 286     {
 287       for (i = 0; i < cf->n; i++)
 288         {
 289           if (i != 0)
 290             fprintf (outf, " ");
 291           fprintf (outf, "[");
 292           dump_affine_function (outf, cf->fns[i]);
 293           fprintf (outf, "]");
 294         }
 295     }
 296 }
 297
 298 /* Dump function for a SUBSCRIPT structure.  */
 299
 300 DEBUG_FUNCTION void
 301 dump_subscript (FILE *outf, struct subscript *subscript)
 302 {
 303   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
 304
 305   fprintf (outf, "\n (subscript \n");
 306   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
 307   dump_conflict_function (outf, cf);
 308   if (CF_NONTRIVIAL_P (cf))
 309     {
 310       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 311       fprintf (outf, "\n  last_conflict: ");
 312       print_generic_expr (outf, last_iteration);
 313     }
 314
 315   cf = SUB_CONFLICTS_IN_B (subscript);
 316   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
 317   dump_conflict_function (outf, cf);
 318   if (CF_NONTRIVIAL_P (cf))
 319     {
 320       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 321       fprintf (outf, "\n  last_conflict: ");
 322       print_generic_expr (outf, last_iteration);
 323     }
 324
 325   fprintf (outf, "\n  (Subscript distance: ");
 326   print_generic_expr (outf, SUB_DISTANCE (subscript));
 327   fprintf (outf, " ))\n");
 328 }
 329
 330 /* Print the classic direction vector DIRV to OUTF.  */
 331
 332 DEBUG_FUNCTION void
 333 print_direction_vector (FILE *outf,
 334                         lambda_vector dirv,
 335                         int length)
 336 {
 337   int eq;
 338
 339   for (eq = 0; eq < length; eq++)
 340     {
 341       enum data_dependence_direction dir = ((enum data_dependence_direction)
 342                                             dirv[eq]);
 343
 344       switch (dir)
 345         {
 346         case dir_positive:
 347           fprintf (outf, "    +");
 348           break;
 349         case dir_negative:
 350           fprintf (outf, "    -");
 351           break;
 352         case dir_equal:
 353           fprintf (outf, "    =");
 354           break;
 355         case dir_positive_or_equal:
 356           fprintf (outf, "   +=");
 357           break;
 358         case dir_positive_or_negative:
 359           fprintf (outf, "   +-");
 360           break;
 361         case dir_negative_or_equal:
 362           fprintf (outf, "   -=");
 363           break;
 364         case dir_star:
 365           fprintf (outf, "    *");
 366           break;
 367         default:
 368           fprintf (outf, "indep");
 369           break;
 370         }
 371     }
 372   fprintf (outf, "\n");
 373 }
 374
 375 /* Print a vector of direction vectors.  */
 376
 377 DEBUG_FUNCTION void
 378 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
 379                    int length)
 380 {
 381   unsigned j;
 382   lambda_vector v;
 383
 384   FOR_EACH_VEC_ELT (dir_vects, j, v)
 385     print_direction_vector (outf, v, length);
 386 }
 387
 388 /* Print out a vector VEC of length N to OUTFILE.  */
 389
 390 DEBUG_FUNCTION void
 391 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
 392 {
 393   int i;
 394
 395   for (i = 0; i < n; i++)
 396     fprintf (outfile, "%3d ", vector[i]);
 397   fprintf (outfile, "\n");
 398 }
 399
 400 /* Print a vector of distance vectors.  */
 401
 402 DEBUG_FUNCTION void
 403 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
 404                     int length)
 405 {
 406   unsigned j;
 407   lambda_vector v;
 408
 409   FOR_EACH_VEC_ELT (dist_vects, j, v)
 410     print_lambda_vector (outf, v, length);
 411 }
 412
 413 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
 414
 415 DEBUG_FUNCTION void
 416 dump_data_dependence_relation (FILE *outf,
 417                                struct data_dependence_relation *ddr)
 418 {
 419   struct data_reference *dra, *drb;
 420
 421   fprintf (outf, "(Data Dep: \n");
 422
 423   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 424     {
 425       if (ddr)
 426         {
 427           dra = DDR_A (ddr);
 428           drb = DDR_B (ddr);
 429           if (dra)
 430             dump_data_reference (outf, dra);
 431           else
 432             fprintf (outf, "    (nil)\n");
 433           if (drb)
 434             dump_data_reference (outf, drb);
 435           else
 436             fprintf (outf, "    (nil)\n");
 437         }
 438       fprintf (outf, "    (don't know)\n)\n");
 439       return;
 440     }
 441
 442   dra = DDR_A (ddr);
 443   drb = DDR_B (ddr);
 444   dump_data_reference (outf, dra);
 445   dump_data_reference (outf, drb);
 446
 447   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 448     fprintf (outf, "    (no dependence)\n");
 449
 450   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
 451     {
 452       unsigned int i;
 453       struct loop *loopi;
 454
 455       subscript *sub;
 456       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
 457         {
 458           fprintf (outf, "  access_fn_A: ");
 459           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
 460           fprintf (outf, "  access_fn_B: ");
 461           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
 462           dump_subscript (outf, sub);
 463         }
 464
 465       fprintf (outf, "  inner loop index: %d\n", DDR_INNER_LOOP (ddr));
 466       fprintf (outf, "  loop nest: (");
 467       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
 468         fprintf (outf, "%d ", loopi->num);
 469       fprintf (outf, ")\n");
 470
 471       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
 472         {
 473           fprintf (outf, "  distance_vector: ");
 474           print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
 475                                DDR_NB_LOOPS (ddr));
 476         }
 477
 478       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
 479         {
 480           fprintf (outf, "  direction_vector: ");
 481           print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
 482                                   DDR_NB_LOOPS (ddr));
 483         }
 484     }
 485
 486   fprintf (outf, ")\n");
 487 }
 488
 489 /* Debug version.  */
 490
 491 DEBUG_FUNCTION void
 492 debug_data_dependence_relation (struct data_dependence_relation *ddr)
 493 {
 494   dump_data_dependence_relation (stderr, ddr);
 495 }
 496
 497 /* Dump into FILE all the dependence relations from DDRS.  */
 498
 499 DEBUG_FUNCTION void
 500 dump_data_dependence_relations (FILE *file,
 501                                 vec<ddr_p> ddrs)
 502 {
 503   unsigned int i;
 504   struct data_dependence_relation *ddr;
 505
 506   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 507     dump_data_dependence_relation (file, ddr);
 508 }
 509
 510 DEBUG_FUNCTION void
 511 debug (vec<ddr_p> &ref)
 512 {
 513   dump_data_dependence_relations (stderr, ref);
 514 }
 515
 516 DEBUG_FUNCTION void
 517 debug (vec<ddr_p> *ptr)
 518 {
 519   if (ptr)
 520     debug (*ptr);
 521   else
 522     fprintf (stderr, "<nil>\n");
 523 }
 524
 525
 526 /* Dump to STDERR all the dependence relations from DDRS.  */
 527
 528 DEBUG_FUNCTION void
 529 debug_data_dependence_relations (vec<ddr_p> ddrs)
 530 {
 531   dump_data_dependence_relations (stderr, ddrs);
 532 }
 533
 534 /* Dumps the distance and direction vectors in FILE.  DDRS contains
 535    the dependence relations, and VECT_SIZE is the size of the
 536    dependence vectors, or in other words the number of loops in the
 537    considered nest.  */
 538
 539 DEBUG_FUNCTION void
 540 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
 541 {
 542   unsigned int i, j;
 543   struct data_dependence_relation *ddr;
 544   lambda_vector v;
 545
 546   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 547     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
 548       {
 549         FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
 550           {
 551             fprintf (file, "DISTANCE_V (");
 552             print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
 553             fprintf (file, ")\n");
 554           }
 555
 556         FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
 557           {
 558             fprintf (file, "DIRECTION_V (");
 559             print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
 560             fprintf (file, ")\n");
 561           }
 562       }
 563
 564   fprintf (file, "\n\n");
 565 }
 566
 567 /* Dumps the data dependence relations DDRS in FILE.  */
 568
 569 DEBUG_FUNCTION void
 570 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
 571 {
 572   unsigned int i;
 573   struct data_dependence_relation *ddr;
 574
 575   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 576     dump_data_dependence_relation (file, ddr);
 577
 578   fprintf (file, "\n\n");
 579 }
 580
 581 DEBUG_FUNCTION void
 582 debug_ddrs (vec<ddr_p> ddrs)
 583 {
 584   dump_ddrs (stderr, ddrs);
 585 }
 586
 587 /* Helper function for split_constant_offset.  Expresses OP0 CODE OP1
 588    (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
 589    constant of type ssizetype, and returns true.  If we cannot do this
 590    with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
 591    is returned.  */
 592
 593 static bool
 594 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 595                          tree *var, tree *off)
 596 {
 597   tree var0, var1;
 598   tree off0, off1;
 599   enum tree_code ocode = code;
 600
 601   *var = NULL_TREE;
 602   *off = NULL_TREE;
 603
 604   switch (code)
 605     {
 606     case INTEGER_CST:
 607       *var = build_int_cst (type, 0);
 608       *off = fold_convert (ssizetype, op0);
 609       return true;
 610
 611     case POINTER_PLUS_EXPR:
 612       ocode = PLUS_EXPR;
 613       /* FALLTHROUGH */
 614     case PLUS_EXPR:
 615     case MINUS_EXPR:
 616       split_constant_offset (op0, &var0, &off0);
 617       split_constant_offset (op1, &var1, &off1);
 618       *var = fold_build2 (code, type, var0, var1);
 619       *off = size_binop (ocode, off0, off1);
 620       return true;
 621
 622     case MULT_EXPR:
 623       if (TREE_CODE (op1) != INTEGER_CST)
 624         return false;
 625
 626       split_constant_offset (op0, &var0, &off0);
 627       *var = fold_build2 (MULT_EXPR, type, var0, op1);
 628       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
 629       return true;
 630
 631     case ADDR_EXPR:
 632       {
 633         tree base, poffset;
 634         poly_int64 pbitsize, pbitpos, pbytepos;
 635         machine_mode pmode;
 636         int punsignedp, preversep, pvolatilep;
 637
 638         op0 = TREE_OPERAND (op0, 0);
 639         base
 640           = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
 641                                  &punsignedp, &preversep, &pvolatilep);
 642
 643         if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 644           return false;
 645         base = build_fold_addr_expr (base);
 646         off0 = ssize_int (pbytepos);
 647
 648         if (poffset)
 649           {
 650             split_constant_offset (poffset, &poffset, &off1);
 651             off0 = size_binop (PLUS_EXPR, off0, off1);
 652             if (POINTER_TYPE_P (TREE_TYPE (base)))
 653               base = fold_build_pointer_plus (base, poffset);
 654             else
 655               base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
 656                                   fold_convert (TREE_TYPE (base), poffset));
 657           }
 658
 659         var0 = fold_convert (type, base);
 660
 661         /* If variable length types are involved, punt, otherwise casts
 662            might be converted into ARRAY_REFs in gimplify_conversion.
 663            To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
 664            possibly no longer appears in current GIMPLE, might resurface.
 665            This perhaps could run
 666            if (CONVERT_EXPR_P (var0))
 667              {
 668                gimplify_conversion (&var0);
 669                // Attempt to fill in any within var0 found ARRAY_REF's
 670                // element size from corresponding op embedded ARRAY_REF,
 671                // if unsuccessful, just punt.
 672              }  */
 673         while (POINTER_TYPE_P (type))
 674           type = TREE_TYPE (type);
 675         if (int_size_in_bytes (type) < 0)
 676           return false;
 677
 678         *var = var0;
 679         *off = off0;
 680         return true;
 681       }
 682
 683     case SSA_NAME:
 684       {
 685         if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
 686           return false;
 687
 688         gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
 689         enum tree_code subcode;
 690
 691         if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
 692           return false;
 693
 694         var0 = gimple_assign_rhs1 (def_stmt);
 695         subcode = gimple_assign_rhs_code (def_stmt);
 696         var1 = gimple_assign_rhs2 (def_stmt);
 697
 698         return split_constant_offset_1 (type, var0, subcode, var1, var, off);
 699       }
 700     CASE_CONVERT:
 701       {
 702         /* We must not introduce undefined overflow, and we must not change the value.
 703            Hence we're okay if the inner type doesn't overflow to start with
 704            (pointer or signed), the outer type also is an integer or pointer
 705            and the outer precision is at least as large as the inner.  */
 706         tree itype = TREE_TYPE (op0);
 707         if ((POINTER_TYPE_P (itype)
 708              || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
 709             && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
 710             && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
 711           {
 712             if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype))
 713               {
 714                 /* Split the unconverted operand and try to prove that
 715                    wrapping isn't a problem.  */
 716                 tree tmp_var, tmp_off;
 717                 split_constant_offset (op0, &tmp_var, &tmp_off);
 718
 719                 /* See whether we have an SSA_NAME whose range is known
 720                    to be [A, B].  */
 721                 if (TREE_CODE (tmp_var) != SSA_NAME)
 722                   return false;
 723                 wide_int var_min, var_max;
 724                 value_range_type vr_type = get_range_info (tmp_var, &var_min,
 725                                                            &var_max);
 726                 wide_int var_nonzero = get_nonzero_bits (tmp_var);
 727                 signop sgn = TYPE_SIGN (itype);
 728                 if (intersect_range_with_nonzero_bits (vr_type, &var_min,
 729                                                        &var_max, var_nonzero,
 730                                                        sgn) != VR_RANGE)
 731                   return false;
 732
 733                 /* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
 734                    is known to be [A + TMP_OFF, B + TMP_OFF], with all
 735                    operations done in ITYPE.  The addition must overflow
 736                    at both ends of the range or at neither.  */
 737                 bool overflow[2];
 738                 unsigned int prec = TYPE_PRECISION (itype);
 739                 wide_int woff = wi::to_wide (tmp_off, prec);
 740                 wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
 741                 wi::add (var_max, woff, sgn, &overflow[1]);
 742                 if (overflow[0] != overflow[1])
 743                   return false;
 744
 745                 /* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR.  */
 746                 widest_int diff = (widest_int::from (op0_min, sgn)
 747                                    - widest_int::from (var_min, sgn));
 748                 var0 = tmp_var;
 749                 *off = wide_int_to_tree (ssizetype, diff);
 750               }
 751             else
 752               split_constant_offset (op0, &var0, off);
 753             *var = fold_convert (type, var0);
 754             return true;
 755           }
 756         return false;
 757       }
 758
 759     default:
 760       return false;
 761     }
 762 }
 763
 764 /* Expresses EXP as VAR + OFF, where off is a constant.  The type of OFF
 765    will be ssizetype.  */
 766
 767 void
 768 split_constant_offset (tree exp, tree *var, tree *off)
 769 {
 770   tree type = TREE_TYPE (exp), op0, op1, e, o;
 771   enum tree_code code;
 772
 773   *var = exp;
 774   *off = ssize_int (0);
 775
 776   if (tree_is_chrec (exp)
 777       || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
 778     return;
 779
 780   code = TREE_CODE (exp);
 781   extract_ops_from_tree (exp, &code, &op0, &op1);
 782   if (split_constant_offset_1 (type, op0, code, op1, &e, &o))
 783     {
 784       *var = e;
 785       *off = o;
 786     }
 787 }
 788
 789 /* Returns the address ADDR of an object in a canonical shape (without nop
 790    casts, and with type of pointer to the object).  */
 791
 792 static tree
 793 canonicalize_base_object_address (tree addr)
 794 {
 795   tree orig = addr;
 796
 797   STRIP_NOPS (addr);
 798
 799   /* The base address may be obtained by casting from integer, in that case
 800      keep the cast.  */
 801   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
 802     return orig;
 803
 804   if (TREE_CODE (addr) != ADDR_EXPR)
 805     return addr;
 806
 807   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
 808 }
 809
 810 /* Analyze the behavior of memory reference REF.  There are two modes:
 811
 812    - BB analysis.  In this case we simply split the address into base,
 813      init and offset components, without reference to any containing loop.
 814      The resulting base and offset are general expressions and they can
 815      vary arbitrarily from one iteration of the containing loop to the next.
 816      The step is always zero.
 817
 818    - loop analysis.  In this case we analyze the reference both wrt LOOP
 819      and on the basis that the reference occurs (is "used") in LOOP;
 820      see the comment above analyze_scalar_evolution_in_loop for more
 821      information about this distinction.  The base, init, offset and
 822      step fields are all invariant in LOOP.
 823
 824    Perform BB analysis if LOOP is null, or if LOOP is the function's
 825    dummy outermost loop.  In other cases perform loop analysis.
 826
 827    Return true if the analysis succeeded and store the results in DRB if so.
 828    BB analysis can only fail for bitfield or reversed-storage accesses.  */
 829
 830 bool
 831 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
 832                       struct loop *loop)
 833 {
 834   poly_int64 pbitsize, pbitpos;
 835   tree base, poffset;
 836   machine_mode pmode;
 837   int punsignedp, preversep, pvolatilep;
 838   affine_iv base_iv, offset_iv;
 839   tree init, dinit, step;
 840   bool in_loop = (loop && loop->num);
 841
 842   if (dump_file && (dump_flags & TDF_DETAILS))
 843     fprintf (dump_file, "analyze_innermost: ");
 844
 845   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
 846                               &punsignedp, &preversep, &pvolatilep);
 847   gcc_assert (base != NULL_TREE);
 848
 849   poly_int64 pbytepos;
 850   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 851     {
 852       if (dump_file && (dump_flags & TDF_DETAILS))
 853         fprintf (dump_file, "failed: bit offset alignment.\n");
 854       return false;
 855     }
 856
 857   if (preversep)
 858     {
 859       if (dump_file && (dump_flags & TDF_DETAILS))
 860         fprintf (dump_file, "failed: reverse storage order.\n");
 861       return false;
 862     }
 863
 864   /* Calculate the alignment and misalignment for the inner reference.  */
 865   unsigned int HOST_WIDE_INT bit_base_misalignment;
 866   unsigned int bit_base_alignment;
 867   get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
 868
 869   /* There are no bitfield references remaining in BASE, so the values
 870      we got back must be whole bytes.  */
 871   gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
 872               && bit_base_misalignment % BITS_PER_UNIT == 0);
 873   unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
 874   poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
 875
 876   if (TREE_CODE (base) == MEM_REF)
 877     {
 878       if (!integer_zerop (TREE_OPERAND (base, 1)))
 879         {
 880           /* Subtract MOFF from the base and add it to POFFSET instead.
 881              Adjust the misalignment to reflect the amount we subtracted.  */
 882           poly_offset_int moff = mem_ref_offset (base);
 883           base_misalignment -= moff.force_shwi ();
 884           tree mofft = wide_int_to_tree (sizetype, moff);
 885           if (!poffset)
 886             poffset = mofft;
 887           else
 888             poffset = size_binop (PLUS_EXPR, poffset, mofft);
 889         }
 890       base = TREE_OPERAND (base, 0);
 891     }
 892   else
 893     base = build_fold_addr_expr (base);
 894
 895   if (in_loop)
 896     {
 897       if (!simple_iv (loop, loop, base, &base_iv, true))
 898         {
 899           if (dump_file && (dump_flags & TDF_DETAILS))
 900             fprintf (dump_file, "failed: evolution of base is not affine.\n");
 901           return false;
 902         }
 903     }
 904   else
 905     {
 906       base_iv.base = base;
 907       base_iv.step = ssize_int (0);
 908       base_iv.no_overflow = true;
 909     }
 910
 911   if (!poffset)
 912     {
 913       offset_iv.base = ssize_int (0);
 914       offset_iv.step = ssize_int (0);
 915     }
 916   else
 917     {
 918       if (!in_loop)
 919         {
 920           offset_iv.base = poffset;
 921           offset_iv.step = ssize_int (0);
 922         }
 923       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
 924         {
 925           if (dump_file && (dump_flags & TDF_DETAILS))
 926             fprintf (dump_file, "failed: evolution of offset is not affine.\n");
 927           return false;
 928         }
 929     }
 930
 931   init = ssize_int (pbytepos);
 932
 933   /* Subtract any constant component from the base and add it to INIT instead.
 934      Adjust the misalignment to reflect the amount we subtracted.  */
 935   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
 936   init = size_binop (PLUS_EXPR, init, dinit);
 937   base_misalignment -= TREE_INT_CST_LOW (dinit);
 938
 939   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
 940   init = size_binop (PLUS_EXPR, init, dinit);
 941
 942   step = size_binop (PLUS_EXPR,
 943                      fold_convert (ssizetype, base_iv.step),
 944                      fold_convert (ssizetype, offset_iv.step));
 945
 946   base = canonicalize_base_object_address (base_iv.base);
 947
 948   /* See if get_pointer_alignment can guarantee a higher alignment than
 949      the one we calculated above.  */
 950   unsigned int HOST_WIDE_INT alt_misalignment;
 951   unsigned int alt_alignment;
 952   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
 953
 954   /* As above, these values must be whole bytes.  */
 955   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
 956               && alt_misalignment % BITS_PER_UNIT == 0);
 957   alt_alignment /= BITS_PER_UNIT;
 958   alt_misalignment /= BITS_PER_UNIT;
 959
 960   if (base_alignment < alt_alignment)
 961     {
 962       base_alignment = alt_alignment;
 963       base_misalignment = alt_misalignment;
 964     }
 965
 966   drb->base_address = base;
 967   drb->offset = fold_convert (ssizetype, offset_iv.base);
 968   drb->init = init;
 969   drb->step = step;
 970   if (known_misalignment (base_misalignment, base_alignment,
 971                           &drb->base_misalignment))
 972     drb->base_alignment = base_alignment;
 973   else
 974     {
 975       drb->base_alignment = known_alignment (base_misalignment);
 976       drb->base_misalignment = 0;
 977     }
 978   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
 979   drb->step_alignment = highest_pow2_factor (step);
 980
 981   if (dump_file && (dump_flags & TDF_DETAILS))
 982     fprintf (dump_file, "success.\n");
 983
 984   return true;
 985 }
 986
 987 /* Return true if OP is a valid component reference for a DR access
 988    function.  This accepts a subset of what handled_component_p accepts.  */
 989
 990 static bool
 991 access_fn_component_p (tree op)
 992 {
 993   switch (TREE_CODE (op))
 994     {
 995     case REALPART_EXPR:
 996     case IMAGPART_EXPR:
 997     case ARRAY_REF:
 998       return true;
 999
1000     case COMPONENT_REF:
1001       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1002
1003     default:
1004       return false;
1005     }
1006 }
1007
1008 /* Determines the base object and the list of indices of memory reference
1009    DR, analyzed in LOOP and instantiated before NEST.  */
1010
1011 static void
1012 dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
1013 {
1014   vec<tree> access_fns = vNULL;
1015   tree ref, op;
1016   tree base, off, access_fn;
1017
1018   /* If analyzing a basic-block there are no indices to analyze
1019      and thus no access functions.  */
1020   if (!nest)
1021     {
1022       DR_BASE_OBJECT (dr) = DR_REF (dr);
1023       DR_ACCESS_FNS (dr).create (0);
1024       return;
1025     }
1026
1027   ref = DR_REF (dr);
1028
1029   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1030      into a two element array with a constant index.  The base is
1031      then just the immediate underlying object.  */
1032   if (TREE_CODE (ref) == REALPART_EXPR)
1033     {
1034       ref = TREE_OPERAND (ref, 0);
1035       access_fns.safe_push (integer_zero_node);
1036     }
1037   else if (TREE_CODE (ref) == IMAGPART_EXPR)
1038     {
1039       ref = TREE_OPERAND (ref, 0);
1040       access_fns.safe_push (integer_one_node);
1041     }
1042
1043   /* Analyze access functions of dimensions we know to be independent.
1044      The list of component references handled here should be kept in
1045      sync with access_fn_component_p.  */
1046   while (handled_component_p (ref))
1047     {
1048       if (TREE_CODE (ref) == ARRAY_REF)
1049         {
1050           op = TREE_OPERAND (ref, 1);
1051           access_fn = analyze_scalar_evolution (loop, op);
1052           access_fn = instantiate_scev (nest, loop, access_fn);
1053           access_fns.safe_push (access_fn);
1054         }
1055       else if (TREE_CODE (ref) == COMPONENT_REF
1056                && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1057         {
1058           /* For COMPONENT_REFs of records (but not unions!) use the
1059              FIELD_DECL offset as constant access function so we can
1060              disambiguate a[i].f1 and a[i].f2.  */
1061           tree off = component_ref_field_offset (ref);
1062           off = size_binop (PLUS_EXPR,
1063                             size_binop (MULT_EXPR,
1064                                         fold_convert (bitsizetype, off),
1065                                         bitsize_int (BITS_PER_UNIT)),
1066                             DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1067           access_fns.safe_push (off);
1068         }
1069       else
1070         /* If we have an unhandled component we could not translate
1071            to an access function stop analyzing.  We have determined
1072            our base object in this case.  */
1073         break;
1074
1075       ref = TREE_OPERAND (ref, 0);
1076     }
1077
1078   /* If the address operand of a MEM_REF base has an evolution in the
1079      analyzed nest, add it as an additional independent access-function.  */
1080   if (TREE_CODE (ref) == MEM_REF)
1081     {
1082       op = TREE_OPERAND (ref, 0);
1083       access_fn = analyze_scalar_evolution (loop, op);
1084       access_fn = instantiate_scev (nest, loop, access_fn);
1085       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1086         {
1087           tree orig_type;
1088           tree memoff = TREE_OPERAND (ref, 1);
1089           base = initial_condition (access_fn);
1090           orig_type = TREE_TYPE (base);
1091           STRIP_USELESS_TYPE_CONVERSION (base);
1092           split_constant_offset (base, &base, &off);
1093           STRIP_USELESS_TYPE_CONVERSION (base);
1094           /* Fold the MEM_REF offset into the evolutions initial
1095              value to make more bases comparable.  */
1096           if (!integer_zerop (memoff))
1097             {
1098               off = size_binop (PLUS_EXPR, off,
1099                                 fold_convert (ssizetype, memoff));
1100               memoff = build_int_cst (TREE_TYPE (memoff), 0);
1101             }
1102           /* Adjust the offset so it is a multiple of the access type
1103              size and thus we separate bases that can possibly be used
1104              to produce partial overlaps (which the access_fn machinery
1105              cannot handle).  */
1106           wide_int rem;
1107           if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1108               && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1109               && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1110             rem = wi::mod_trunc
1111               (wi::to_wide (off),
1112                wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1113                SIGNED);
1114           else
1115             /* If we can't compute the remainder simply force the initial
1116                condition to zero.  */
1117             rem = wi::to_wide (off);
1118           off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1119           memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1120           /* And finally replace the initial condition.  */
1121           access_fn = chrec_replace_initial_condition
1122               (access_fn, fold_convert (orig_type, off));
1123           /* ???  This is still not a suitable base object for
1124              dr_may_alias_p - the base object needs to be an
1125              access that covers the object as whole.  With
1126              an evolution in the pointer this cannot be
1127              guaranteed.
1128              As a band-aid, mark the access so we can special-case
1129              it in dr_may_alias_p.  */
1130           tree old = ref;
1131           ref = fold_build2_loc (EXPR_LOCATION (ref),
1132                                  MEM_REF, TREE_TYPE (ref),
1133                                  base, memoff);
1134           MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1135           MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1136           DR_UNCONSTRAINED_BASE (dr) = true;
1137           access_fns.safe_push (access_fn);
1138         }
1139     }
1140   else if (DECL_P (ref))
1141     {
1142       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1143       ref = build2 (MEM_REF, TREE_TYPE (ref),
1144                     build_fold_addr_expr (ref),
1145                     build_int_cst (reference_alias_ptr_type (ref), 0));
1146     }
1147
1148   DR_BASE_OBJECT (dr) = ref;
1149   DR_ACCESS_FNS (dr) = access_fns;
1150 }
1151
1152 /* Extracts the alias analysis information from the memory reference DR.  */
1153
1154 static void
1155 dr_analyze_alias (struct data_reference *dr)
1156 {
1157   tree ref = DR_REF (dr);
1158   tree base = get_base_address (ref), addr;
1159
1160   if (INDIRECT_REF_P (base)
1161       || TREE_CODE (base) == MEM_REF)
1162     {
1163       addr = TREE_OPERAND (base, 0);
1164       if (TREE_CODE (addr) == SSA_NAME)
1165         DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1166     }
1167 }
1168
1169 /* Frees data reference DR.  */
1170
1171 void
1172 free_data_ref (data_reference_p dr)
1173 {
1174   DR_ACCESS_FNS (dr).release ();
1175   free (dr);
1176 }
1177
1178 /* Analyze memory reference MEMREF, which is accessed in STMT.
1179    The reference is a read if IS_READ is true, otherwise it is a write.
1180    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1181    within STMT, i.e. that it might not occur even if STMT is executed
1182    and runs to completion.
1183
1184    Return the data_reference description of MEMREF.  NEST is the outermost
1185    loop in which the reference should be instantiated, LOOP is the loop
1186    in which the data reference should be analyzed.  */
1187
1188 struct data_reference *
1189 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1190                  bool is_read, bool is_conditional_in_stmt)
1191 {
1192   struct data_reference *dr;
1193
1194   if (dump_file && (dump_flags & TDF_DETAILS))
1195     {
1196       fprintf (dump_file, "Creating dr for ");
1197       print_generic_expr (dump_file, memref, TDF_SLIM);
1198       fprintf (dump_file, "\n");
1199     }
1200
1201   dr = XCNEW (struct data_reference);
1202   DR_STMT (dr) = stmt;
1203   DR_REF (dr) = memref;
1204   DR_IS_READ (dr) = is_read;
1205   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1206
1207   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1208                         nest != NULL ? loop : NULL);
1209   dr_analyze_indices (dr, nest, loop);
1210   dr_analyze_alias (dr);
1211
1212   if (dump_file && (dump_flags & TDF_DETAILS))
1213     {
1214       unsigned i;
1215       fprintf (dump_file, "\tbase_address: ");
1216       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1217       fprintf (dump_file, "\n\toffset from base address: ");
1218       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1219       fprintf (dump_file, "\n\tconstant offset from base address: ");
1220       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1221       fprintf (dump_file, "\n\tstep: ");
1222       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1223       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1224       fprintf (dump_file, "\n\tbase misalignment: %d",
1225                DR_BASE_MISALIGNMENT (dr));
1226       fprintf (dump_file, "\n\toffset alignment: %d",
1227                DR_OFFSET_ALIGNMENT (dr));
1228       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1229       fprintf (dump_file, "\n\tbase_object: ");
1230       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1231       fprintf (dump_file, "\n");
1232       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1233         {
1234           fprintf (dump_file, "\tAccess function %d: ", i);
1235           print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1236         }
1237     }
1238
1239   return dr;
1240 }
1241
1242 /*  A helper function computes order between two tree epxressions T1 and T2.
1243     This is used in comparator functions sorting objects based on the order
1244     of tree expressions.  The function returns -1, 0, or 1.  */
1245
1246 int
1247 data_ref_compare_tree (tree t1, tree t2)
1248 {
1249   int i, cmp;
1250   enum tree_code code;
1251   char tclass;
1252
1253   if (t1 == t2)
1254     return 0;
1255   if (t1 == NULL)
1256     return -1;
1257   if (t2 == NULL)
1258     return 1;
1259
1260   STRIP_USELESS_TYPE_CONVERSION (t1);
1261   STRIP_USELESS_TYPE_CONVERSION (t2);
1262   if (t1 == t2)
1263     return 0;
1264
1265   if (TREE_CODE (t1) != TREE_CODE (t2)
1266       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1267     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1268
1269   code = TREE_CODE (t1);
1270   switch (code)
1271     {
1272     case INTEGER_CST:
1273       return tree_int_cst_compare (t1, t2);
1274
1275     case STRING_CST:
1276       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1277         return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1278       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1279                      TREE_STRING_LENGTH (t1));
1280
1281     case SSA_NAME:
1282       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1283         return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1284       break;
1285
1286     default:
1287       if (POLY_INT_CST_P (t1))
1288         return compare_sizes_for_sort (wi::to_poly_widest (t1),
1289                                        wi::to_poly_widest (t2));
1290
1291       tclass = TREE_CODE_CLASS (code);
1292
1293       /* For decls, compare their UIDs.  */
1294       if (tclass == tcc_declaration)
1295         {
1296           if (DECL_UID (t1) != DECL_UID (t2))
1297             return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1298           break;
1299         }
1300       /* For expressions, compare their operands recursively.  */
1301       else if (IS_EXPR_CODE_CLASS (tclass))
1302         {
1303           for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1304             {
1305               cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1306                                            TREE_OPERAND (t2, i));
1307               if (cmp != 0)
1308                 return cmp;
1309             }
1310         }
1311       else
1312         gcc_unreachable ();
1313     }
1314
1315   return 0;
1316 }
1317
1318 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1319    check.  */
1320
1321 bool
1322 runtime_alias_check_p (ddr_p ddr, struct loop *loop, bool speed_p)
1323 {
1324   if (dump_enabled_p ())
1325     {
1326       dump_printf (MSG_NOTE, "consider run-time aliasing test between ");
1327       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
1328       dump_printf (MSG_NOTE,  " and ");
1329       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
1330       dump_printf (MSG_NOTE, "\n");
1331     }
1332
1333   if (!speed_p)
1334     {
1335       if (dump_enabled_p ())
1336         dump_printf (MSG_MISSED_OPTIMIZATION,
1337                      "runtime alias check not supported when optimizing "
1338                      "for size.\n");
1339       return false;
1340     }
1341
1342   /* FORNOW: We don't support versioning with outer-loop in either
1343      vectorization or loop distribution.  */
1344   if (loop != NULL && loop->inner != NULL)
1345     {
1346       if (dump_enabled_p ())
1347         dump_printf (MSG_MISSED_OPTIMIZATION,
1348                      "runtime alias check not supported for outer loop.\n");
1349       return false;
1350     }
1351
1352   return true;
1353 }
1354
1355 /* Operator == between two dr_with_seg_len objects.
1356
1357    This equality operator is used to make sure two data refs
1358    are the same one so that we will consider to combine the
1359    aliasing checks of those two pairs of data dependent data
1360    refs.  */
1361
1362 static bool
1363 operator == (const dr_with_seg_len& d1,
1364              const dr_with_seg_len& d2)
1365 {
1366   return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1367                            DR_BASE_ADDRESS (d2.dr), 0)
1368           && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1369           && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1370           && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1371           && known_eq (d1.access_size, d2.access_size)
1372           && d1.align == d2.align);
1373 }
1374
1375 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1376    so that we can combine aliasing checks in one scan.  */
1377
1378 static int
1379 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1380 {
1381   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1382   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1383   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1384   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1385
1386   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1387      if a and c have the same basic address snd step, and b and d have the same
1388      address and step.  Therefore, if any a&c or b&d don't have the same address
1389      and step, we don't care the order of those two pairs after sorting.  */
1390   int comp_res;
1391
1392   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1393                                          DR_BASE_ADDRESS (b1.dr))) != 0)
1394     return comp_res;
1395   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1396                                          DR_BASE_ADDRESS (b2.dr))) != 0)
1397     return comp_res;
1398   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1399                                          DR_STEP (b1.dr))) != 0)
1400     return comp_res;
1401   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1402                                          DR_STEP (b2.dr))) != 0)
1403     return comp_res;
1404   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1405                                          DR_OFFSET (b1.dr))) != 0)
1406     return comp_res;
1407   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1408                                          DR_INIT (b1.dr))) != 0)
1409     return comp_res;
1410   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1411                                          DR_OFFSET (b2.dr))) != 0)
1412     return comp_res;
1413   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1414                                          DR_INIT (b2.dr))) != 0)
1415     return comp_res;
1416
1417   return 0;
1418 }
1419
1420 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1421    FACTOR is number of iterations that each data reference is accessed.
1422
1423    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1424    we create an expression:
1425
1426    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1427    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1428
1429    for aliasing checks.  However, in some cases we can decrease the number
1430    of checks by combining two checks into one.  For example, suppose we have
1431    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1432    condition is satisfied:
1433
1434    load_ptr_0 < load_ptr_1  &&
1435    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1436
1437    (this condition means, in each iteration of vectorized loop, the accessed
1438    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1439    load_ptr_1.)
1440
1441    we then can use only the following expression to finish the alising checks
1442    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1443
1444    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1445    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1446
1447    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1448    basic address.  */
1449
1450 void
1451 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1452                                poly_uint64)
1453 {
1454   /* Sort the collected data ref pairs so that we can scan them once to
1455      combine all possible aliasing checks.  */
1456   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1457
1458   /* Scan the sorted dr pairs and check if we can combine alias checks
1459      of two neighboring dr pairs.  */
1460   for (size_t i = 1; i < alias_pairs->length (); ++i)
1461     {
1462       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1463       dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
1464                       *dr_b1 = &(*alias_pairs)[i-1].second,
1465                       *dr_a2 = &(*alias_pairs)[i].first,
1466                       *dr_b2 = &(*alias_pairs)[i].second;
1467
1468       /* Remove duplicate data ref pairs.  */
1469       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1470         {
1471           if (dump_enabled_p ())
1472             {
1473               dump_printf (MSG_NOTE, "found equal ranges ");
1474               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1475               dump_printf (MSG_NOTE,  ", ");
1476               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1477               dump_printf (MSG_NOTE,  " and ");
1478               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1479               dump_printf (MSG_NOTE,  ", ");
1480               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1481               dump_printf (MSG_NOTE, "\n");
1482             }
1483           alias_pairs->ordered_remove (i--);
1484           continue;
1485         }
1486
1487       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1488         {
1489           /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1490              and DR_A1 and DR_A2 are two consecutive memrefs.  */
1491           if (*dr_a1 == *dr_a2)
1492             {
1493               std::swap (dr_a1, dr_b1);
1494               std::swap (dr_a2, dr_b2);
1495             }
1496
1497           poly_int64 init_a1, init_a2;
1498           /* Only consider cases in which the distance between the initial
1499              DR_A1 and the initial DR_A2 is known at compile time.  */
1500           if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1501                                 DR_BASE_ADDRESS (dr_a2->dr), 0)
1502               || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1503                                    DR_OFFSET (dr_a2->dr), 0)
1504               || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1505               || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1506             continue;
1507
1508           /* Don't combine if we can't tell which one comes first.  */
1509           if (!ordered_p (init_a1, init_a2))
1510             continue;
1511
1512           /* Make sure dr_a1 starts left of dr_a2.  */
1513           if (maybe_gt (init_a1, init_a2))
1514             {
1515               std::swap (*dr_a1, *dr_a2);
1516               std::swap (init_a1, init_a2);
1517             }
1518
1519           /* Work out what the segment length would be if we did combine
1520              DR_A1 and DR_A2:
1521
1522              - If DR_A1 and DR_A2 have equal lengths, that length is
1523                also the combined length.
1524
1525              - If DR_A1 and DR_A2 both have negative "lengths", the combined
1526                length is the lower bound on those lengths.
1527
1528              - If DR_A1 and DR_A2 both have positive lengths, the combined
1529                length is the upper bound on those lengths.
1530
1531              Other cases are unlikely to give a useful combination.
1532
1533              The lengths both have sizetype, so the sign is taken from
1534              the step instead.  */
1535           if (!operand_equal_p (dr_a1->seg_len, dr_a2->seg_len, 0))
1536             {
1537               poly_uint64 seg_len_a1, seg_len_a2;
1538               if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1539                   || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1540                 continue;
1541
1542               tree indicator_a = dr_direction_indicator (dr_a1->dr);
1543               if (TREE_CODE (indicator_a) != INTEGER_CST)
1544                 continue;
1545
1546               tree indicator_b = dr_direction_indicator (dr_a2->dr);
1547               if (TREE_CODE (indicator_b) != INTEGER_CST)
1548                 continue;
1549
1550               int sign_a = tree_int_cst_sgn (indicator_a);
1551               int sign_b = tree_int_cst_sgn (indicator_b);
1552
1553               poly_uint64 new_seg_len;
1554               if (sign_a <= 0 && sign_b <= 0)
1555                 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1556               else if (sign_a >= 0 && sign_b >= 0)
1557                 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1558               else
1559                 continue;
1560
1561               dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1562                                               new_seg_len);
1563               dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1564             }
1565
1566           /* This is always positive due to the swap above.  */
1567           poly_uint64 diff = init_a2 - init_a1;
1568
1569           /* The new check will start at DR_A1.  Make sure that its access
1570              size encompasses the initial DR_A2.  */
1571           if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1572             {
1573               dr_a1->access_size = upper_bound (dr_a1->access_size,
1574                                                 diff + dr_a2->access_size);
1575               unsigned int new_align = known_alignment (dr_a1->access_size);
1576               dr_a1->align = MIN (dr_a1->align, new_align);
1577             }
1578           if (dump_enabled_p ())
1579             {
1580               dump_printf (MSG_NOTE, "merging ranges for ");
1581               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1582               dump_printf (MSG_NOTE,  ", ");
1583               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1584               dump_printf (MSG_NOTE,  " and ");
1585               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1586               dump_printf (MSG_NOTE,  ", ");
1587               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1588               dump_printf (MSG_NOTE, "\n");
1589             }
1590           alias_pairs->ordered_remove (i);
1591           i--;
1592         }
1593     }
1594 }
1595
1596 /* Given LOOP's two data references and segment lengths described by DR_A
1597    and DR_B, create expression checking if the two addresses ranges intersect
1598    with each other based on index of the two addresses.  This can only be
1599    done if DR_A and DR_B referring to the same (array) object and the index
1600    is the only difference.  For example:
1601
1602                        DR_A                           DR_B
1603       data-ref         arr[i]                         arr[j]
1604       base_object      arr                            arr
1605       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
1606
1607    The addresses and their index are like:
1608
1609         |<- ADDR_A    ->|          |<- ADDR_B    ->|
1610      ------------------------------------------------------->
1611         |   |   |   |   |          |   |   |   |   |
1612      ------------------------------------------------------->
1613         i_0 ...         i_0+4      j_0 ...         j_0+4
1614
1615    We can create expression based on index rather than address:
1616
1617      (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1618
1619    Note evolution step of index needs to be considered in comparison.  */
1620
1621 static bool
1622 create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
1623                                      const dr_with_seg_len& dr_a,
1624                                      const dr_with_seg_len& dr_b)
1625 {
1626   if (integer_zerop (DR_STEP (dr_a.dr))
1627       || integer_zerop (DR_STEP (dr_b.dr))
1628       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1629     return false;
1630
1631   poly_uint64 seg_len1, seg_len2;
1632   if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
1633       || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
1634     return false;
1635
1636   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1637     return false;
1638
1639   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1640     return false;
1641
1642   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1643     return false;
1644
1645   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1646
1647   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1648   unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
1649   if (neg_step)
1650     {
1651       abs_step = -abs_step;
1652       seg_len1 = -seg_len1;
1653       seg_len2 = -seg_len2;
1654     }
1655   else
1656     {
1657       /* Include the access size in the length, so that we only have one
1658          tree addition below.  */
1659       seg_len1 += dr_a.access_size;
1660       seg_len2 += dr_b.access_size;
1661     }
1662
1663   /* Infer the number of iterations with which the memory segment is accessed
1664      by DR.  In other words, alias is checked if memory segment accessed by
1665      DR_A in some iterations intersect with memory segment accessed by DR_B
1666      in the same amount iterations.
1667      Note segnment length is a linear function of number of iterations with
1668      DR_STEP as the coefficient.  */
1669   poly_uint64 niter_len1, niter_len2;
1670   if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
1671       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
1672     return false;
1673
1674   poly_uint64 niter_access1 = 0, niter_access2 = 0;
1675   if (neg_step)
1676     {
1677       /* Divide each access size by the byte step, rounding up.  */
1678       if (!can_div_trunc_p (dr_a.access_size - abs_step - 1,
1679                             abs_step, &niter_access1)
1680           || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
1681                                abs_step, &niter_access2))
1682         return false;
1683     }
1684
1685   unsigned int i;
1686   for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1687     {
1688       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1689       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1690       /* Two indices must be the same if they are not scev, or not scev wrto
1691          current loop being vecorized.  */
1692       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1693           || TREE_CODE (access2) != POLYNOMIAL_CHREC
1694           || CHREC_VARIABLE (access1) != (unsigned)loop->num
1695           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1696         {
1697           if (operand_equal_p (access1, access2, 0))
1698             continue;
1699
1700           return false;
1701         }
1702       /* The two indices must have the same step.  */
1703       if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1704         return false;
1705
1706       tree idx_step = CHREC_RIGHT (access1);
1707       /* Index must have const step, otherwise DR_STEP won't be constant.  */
1708       gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1709       /* Index must evaluate in the same direction as DR.  */
1710       gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1711
1712       tree min1 = CHREC_LEFT (access1);
1713       tree min2 = CHREC_LEFT (access2);
1714       if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1715         return false;
1716
1717       /* Ideally, alias can be checked against loop's control IV, but we
1718          need to prove linear mapping between control IV and reference
1719          index.  Although that should be true, we check against (array)
1720          index of data reference.  Like segment length, index length is
1721          linear function of the number of iterations with index_step as
1722          the coefficient, i.e, niter_len * idx_step.  */
1723       tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1724                                    build_int_cst (TREE_TYPE (min1),
1725                                                   niter_len1));
1726       tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1727                                    build_int_cst (TREE_TYPE (min2),
1728                                                   niter_len2));
1729       tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
1730       tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
1731       /* Adjust ranges for negative step.  */
1732       if (neg_step)
1733         {
1734           /* IDX_LEN1 and IDX_LEN2 are negative in this case.  */
1735           std::swap (min1, max1);
1736           std::swap (min2, max2);
1737
1738           /* As with the lengths just calculated, we've measured the access
1739              sizes in iterations, so multiply them by the index step.  */
1740           tree idx_access1
1741             = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1742                            build_int_cst (TREE_TYPE (min1), niter_access1));
1743           tree idx_access2
1744             = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1745                            build_int_cst (TREE_TYPE (min2), niter_access2));
1746
1747           /* MINUS_EXPR because the above values are negative.  */
1748           max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (max1), max1, idx_access1);
1749           max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (max2), max2, idx_access2);
1750         }
1751       tree part_cond_expr
1752         = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1753             fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
1754             fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
1755       if (*cond_expr)
1756         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1757                                   *cond_expr, part_cond_expr);
1758       else
1759         *cond_expr = part_cond_expr;
1760     }
1761   return true;
1762 }
1763
1764 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
1765    every address ADDR accessed by D:
1766
1767      *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
1768
1769    In this case, every element accessed by D is aligned to at least
1770    ALIGN bytes.
1771
1772    If ALIGN is zero then instead set *SEG_MAX_OUT so that:
1773
1774      *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT.  */
1775
1776 static void
1777 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
1778                      tree *seg_max_out, HOST_WIDE_INT align)
1779 {
1780   /* Each access has the following pattern:
1781
1782           <- |seg_len| ->
1783           <--- A: -ve step --->
1784           +-----+-------+-----+-------+-----+
1785           | n-1 | ,.... |  0  | ..... | n-1 |
1786           +-----+-------+-----+-------+-----+
1787                         <--- B: +ve step --->
1788                         <- |seg_len| ->
1789                         |
1790                    base address
1791
1792      where "n" is the number of scalar iterations covered by the segment.
1793      (This should be VF for a particular pair if we know that both steps
1794      are the same, otherwise it will be the full number of scalar loop
1795      iterations.)
1796
1797      A is the range of bytes accessed when the step is negative,
1798      B is the range when the step is positive.
1799
1800      If the access size is "access_size" bytes, the lowest addressed byte is:
1801
1802          base + (step < 0 ? seg_len : 0)   [LB]
1803
1804      and the highest addressed byte is always below:
1805
1806          base + (step < 0 ? 0 : seg_len) + access_size   [UB]
1807
1808      Thus:
1809
1810          LB <= ADDR < UB
1811
1812      If ALIGN is nonzero, all three values are aligned to at least ALIGN
1813      bytes, so:
1814
1815          LB <= ADDR <= UB - ALIGN
1816
1817      where "- ALIGN" folds naturally with the "+ access_size" and often
1818      cancels it out.
1819
1820      We don't try to simplify LB and UB beyond this (e.g. by using
1821      MIN and MAX based on whether seg_len rather than the stride is
1822      negative) because it is possible for the absolute size of the
1823      segment to overflow the range of a ssize_t.
1824
1825      Keeping the pointer_plus outside of the cond_expr should allow
1826      the cond_exprs to be shared with other alias checks.  */
1827   tree indicator = dr_direction_indicator (d.dr);
1828   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
1829                                fold_convert (ssizetype, indicator),
1830                                ssize_int (0));
1831   tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
1832                                             DR_OFFSET (d.dr));
1833   addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
1834   tree seg_len
1835     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
1836
1837   tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1838                                 seg_len, size_zero_node);
1839   tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1840                                 size_zero_node, seg_len);
1841   max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
1842                            size_int (d.access_size - align));
1843
1844   *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
1845   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
1846 }
1847
1848 /* Given two data references and segment lengths described by DR_A and DR_B,
1849    create expression checking if the two addresses ranges intersect with
1850    each other:
1851
1852      ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
1853      || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0))  */
1854
1855 static void
1856 create_intersect_range_checks (struct loop *loop, tree *cond_expr,
1857                                const dr_with_seg_len& dr_a,
1858                                const dr_with_seg_len& dr_b)
1859 {
1860   *cond_expr = NULL_TREE;
1861   if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
1862     return;
1863
1864   unsigned HOST_WIDE_INT min_align;
1865   tree_code cmp_code;
1866   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
1867       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
1868     {
1869       /* In this case adding access_size to seg_len is likely to give
1870          a simple X * step, where X is either the number of scalar
1871          iterations or the vectorization factor.  We're better off
1872          keeping that, rather than subtracting an alignment from it.
1873
1874          In this case the maximum values are exclusive and so there is
1875          no alias if the maximum of one segment equals the minimum
1876          of another.  */
1877       min_align = 0;
1878       cmp_code = LE_EXPR;
1879     }
1880   else
1881     {
1882       /* Calculate the minimum alignment shared by all four pointers,
1883          then arrange for this alignment to be subtracted from the
1884          exclusive maximum values to get inclusive maximum values.
1885          This "- min_align" is cumulative with a "+ access_size"
1886          in the calculation of the maximum values.  In the best
1887          (and common) case, the two cancel each other out, leaving
1888          us with an inclusive bound based only on seg_len.  In the
1889          worst case we're simply adding a smaller number than before.
1890
1891          Because the maximum values are inclusive, there is an alias
1892          if the maximum value of one segment is equal to the minimum
1893          value of the other.  */
1894       min_align = MIN (dr_a.align, dr_b.align);
1895       cmp_code = LT_EXPR;
1896     }
1897
1898   tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
1899   get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
1900   get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
1901
1902   *cond_expr
1903     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1904         fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
1905         fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
1906 }
1907
1908 /* Create a conditional expression that represents the run-time checks for
1909    overlapping of address ranges represented by a list of data references
1910    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
1911    COND_EXPR is the conditional expression to be used in the if statement
1912    that controls which version of the loop gets executed at runtime.  */
1913
1914 void
1915 create_runtime_alias_checks (struct loop *loop,
1916                              vec<dr_with_seg_len_pair_t> *alias_pairs,
1917                              tree * cond_expr)
1918 {
1919   tree part_cond_expr;
1920
1921   for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
1922     {
1923       const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
1924       const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
1925
1926       if (dump_enabled_p ())
1927         {
1928           dump_printf (MSG_NOTE, "create runtime check for data references ");
1929           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
1930           dump_printf (MSG_NOTE, " and ");
1931           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
1932           dump_printf (MSG_NOTE, "\n");
1933         }
1934
1935       /* Create condition expression for each pair data references.  */
1936       create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
1937       if (*cond_expr)
1938         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1939                                   *cond_expr, part_cond_expr);
1940       else
1941         *cond_expr = part_cond_expr;
1942     }
1943 }
1944
1945 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1946    expressions.  */
1947 static bool
1948 dr_equal_offsets_p1 (tree offset1, tree offset2)
1949 {
1950   bool res;
1951
1952   STRIP_NOPS (offset1);
1953   STRIP_NOPS (offset2);
1954
1955   if (offset1 == offset2)
1956     return true;
1957
1958   if (TREE_CODE (offset1) != TREE_CODE (offset2)
1959       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1960     return false;
1961
1962   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1963                              TREE_OPERAND (offset2, 0));
1964
1965   if (!res || !BINARY_CLASS_P (offset1))
1966     return res;
1967
1968   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1969                              TREE_OPERAND (offset2, 1));
1970
1971   return res;
1972 }
1973
1974 /* Check if DRA and DRB have equal offsets.  */
1975 bool
1976 dr_equal_offsets_p (struct data_reference *dra,
1977                     struct data_reference *drb)
1978 {
1979   tree offset1, offset2;
1980
1981   offset1 = DR_OFFSET (dra);
1982   offset2 = DR_OFFSET (drb);
1983
1984   return dr_equal_offsets_p1 (offset1, offset2);
1985 }
1986
1987 /* Returns true if FNA == FNB.  */
1988
1989 static bool
1990 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1991 {
1992   unsigned i, n = fna.length ();
1993
1994   if (n != fnb.length ())
1995     return false;
1996
1997   for (i = 0; i < n; i++)
1998     if (!operand_equal_p (fna[i], fnb[i], 0))
1999       return false;
2000
2001   return true;
2002 }
2003
2004 /* If all the functions in CF are the same, returns one of them,
2005    otherwise returns NULL.  */
2006
2007 static affine_fn
2008 common_affine_function (conflict_function *cf)
2009 {
2010   unsigned i;
2011   affine_fn comm;
2012
2013   if (!CF_NONTRIVIAL_P (cf))
2014     return affine_fn ();
2015
2016   comm = cf->fns[0];
2017
2018   for (i = 1; i < cf->n; i++)
2019     if (!affine_function_equal_p (comm, cf->fns[i]))
2020       return affine_fn ();
2021
2022   return comm;
2023 }
2024
2025 /* Returns the base of the affine function FN.  */
2026
2027 static tree
2028 affine_function_base (affine_fn fn)
2029 {
2030   return fn[0];
2031 }
2032
2033 /* Returns true if FN is a constant.  */
2034
2035 static bool
2036 affine_function_constant_p (affine_fn fn)
2037 {
2038   unsigned i;
2039   tree coef;
2040
2041   for (i = 1; fn.iterate (i, &coef); i++)
2042     if (!integer_zerop (coef))
2043       return false;
2044
2045   return true;
2046 }
2047
2048 /* Returns true if FN is the zero constant function.  */
2049
2050 static bool
2051 affine_function_zero_p (affine_fn fn)
2052 {
2053   return (integer_zerop (affine_function_base (fn))
2054           && affine_function_constant_p (fn));
2055 }
2056
2057 /* Returns a signed integer type with the largest precision from TA
2058    and TB.  */
2059
2060 static tree
2061 signed_type_for_types (tree ta, tree tb)
2062 {
2063   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2064     return signed_type_for (ta);
2065   else
2066     return signed_type_for (tb);
2067 }
2068
2069 /* Applies operation OP on affine functions FNA and FNB, and returns the
2070    result.  */
2071
2072 static affine_fn
2073 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2074 {
2075   unsigned i, n, m;
2076   affine_fn ret;
2077   tree coef;
2078
2079   if (fnb.length () > fna.length ())
2080     {
2081       n = fna.length ();
2082       m = fnb.length ();
2083     }
2084   else
2085     {
2086       n = fnb.length ();
2087       m = fna.length ();
2088     }
2089
2090   ret.create (m);
2091   for (i = 0; i < n; i++)
2092     {
2093       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2094                                          TREE_TYPE (fnb[i]));
2095       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2096     }
2097
2098   for (; fna.iterate (i, &coef); i++)
2099     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2100                                  coef, integer_zero_node));
2101   for (; fnb.iterate (i, &coef); i++)
2102     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2103                                  integer_zero_node, coef));
2104
2105   return ret;
2106 }
2107
2108 /* Returns the sum of affine functions FNA and FNB.  */
2109
2110 static affine_fn
2111 affine_fn_plus (affine_fn fna, affine_fn fnb)
2112 {
2113   return affine_fn_op (PLUS_EXPR, fna, fnb);
2114 }
2115
2116 /* Returns the difference of affine functions FNA and FNB.  */
2117
2118 static affine_fn
2119 affine_fn_minus (affine_fn fna, affine_fn fnb)
2120 {
2121   return affine_fn_op (MINUS_EXPR, fna, fnb);
2122 }
2123
2124 /* Frees affine function FN.  */
2125
2126 static void
2127 affine_fn_free (affine_fn fn)
2128 {
2129   fn.release ();
2130 }
2131
2132 /* Determine for each subscript in the data dependence relation DDR
2133    the distance.  */
2134
2135 static void
2136 compute_subscript_distance (struct data_dependence_relation *ddr)
2137 {
2138   conflict_function *cf_a, *cf_b;
2139   affine_fn fn_a, fn_b, diff;
2140
2141   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2142     {
2143       unsigned int i;
2144
2145       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2146         {
2147           struct subscript *subscript;
2148
2149           subscript = DDR_SUBSCRIPT (ddr, i);
2150           cf_a = SUB_CONFLICTS_IN_A (subscript);
2151           cf_b = SUB_CONFLICTS_IN_B (subscript);
2152
2153           fn_a = common_affine_function (cf_a);
2154           fn_b = common_affine_function (cf_b);
2155           if (!fn_a.exists () || !fn_b.exists ())
2156             {
2157               SUB_DISTANCE (subscript) = chrec_dont_know;
2158               return;
2159             }
2160           diff = affine_fn_minus (fn_a, fn_b);
2161
2162           if (affine_function_constant_p (diff))
2163             SUB_DISTANCE (subscript) = affine_function_base (diff);
2164           else
2165             SUB_DISTANCE (subscript) = chrec_dont_know;
2166
2167           affine_fn_free (diff);
2168         }
2169     }
2170 }
2171
2172 /* Returns the conflict function for "unknown".  */
2173
2174 static conflict_function *
2175 conflict_fn_not_known (void)
2176 {
2177   conflict_function *fn = XCNEW (conflict_function);
2178   fn->n = NOT_KNOWN;
2179
2180   return fn;
2181 }
2182
2183 /* Returns the conflict function for "independent".  */
2184
2185 static conflict_function *
2186 conflict_fn_no_dependence (void)
2187 {
2188   conflict_function *fn = XCNEW (conflict_function);
2189   fn->n = NO_DEPENDENCE;
2190
2191   return fn;
2192 }
2193
2194 /* Returns true if the address of OBJ is invariant in LOOP.  */
2195
2196 static bool
2197 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
2198 {
2199   while (handled_component_p (obj))
2200     {
2201       if (TREE_CODE (obj) == ARRAY_REF)
2202         {
2203           for (int i = 1; i < 4; ++i)
2204             if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2205                                                         loop->num))
2206               return false;
2207         }
2208       else if (TREE_CODE (obj) == COMPONENT_REF)
2209         {
2210           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2211                                                       loop->num))
2212             return false;
2213         }
2214       obj = TREE_OPERAND (obj, 0);
2215     }
2216
2217   if (!INDIRECT_REF_P (obj)
2218       && TREE_CODE (obj) != MEM_REF)
2219     return true;
2220
2221   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2222                                                   loop->num);
2223 }
2224
2225 /* Returns false if we can prove that data references A and B do not alias,
2226    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2227    considered.  */
2228
2229 bool
2230 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2231                 bool loop_nest)
2232 {
2233   tree addr_a = DR_BASE_OBJECT (a);
2234   tree addr_b = DR_BASE_OBJECT (b);
2235
2236   /* If we are not processing a loop nest but scalar code we
2237      do not need to care about possible cross-iteration dependences
2238      and thus can process the full original reference.  Do so,
2239      similar to how loop invariant motion applies extra offset-based
2240      disambiguation.  */
2241   if (!loop_nest)
2242     {
2243       aff_tree off1, off2;
2244       poly_widest_int size1, size2;
2245       get_inner_reference_aff (DR_REF (a), &off1, &size1);
2246       get_inner_reference_aff (DR_REF (b), &off2, &size2);
2247       aff_combination_scale (&off1, -1);
2248       aff_combination_add (&off2, &off1);
2249       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2250         return false;
2251     }
2252
2253   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2254       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2255       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2256       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2257     return false;
2258
2259   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2260      do not know the size of the base-object.  So we cannot do any
2261      offset/overlap based analysis but have to rely on points-to
2262      information only.  */
2263   if (TREE_CODE (addr_a) == MEM_REF
2264       && (DR_UNCONSTRAINED_BASE (a)
2265           || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2266     {
2267       /* For true dependences we can apply TBAA.  */
2268       if (flag_strict_aliasing
2269           && DR_IS_WRITE (a) && DR_IS_READ (b)
2270           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2271                                      get_alias_set (DR_REF (b))))
2272         return false;
2273       if (TREE_CODE (addr_b) == MEM_REF)
2274         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2275                                        TREE_OPERAND (addr_b, 0));
2276       else
2277         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2278                                        build_fold_addr_expr (addr_b));
2279     }
2280   else if (TREE_CODE (addr_b) == MEM_REF
2281            && (DR_UNCONSTRAINED_BASE (b)
2282                || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
2283     {
2284       /* For true dependences we can apply TBAA.  */
2285       if (flag_strict_aliasing
2286           && DR_IS_WRITE (a) && DR_IS_READ (b)
2287           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2288                                      get_alias_set (DR_REF (b))))
2289         return false;
2290       if (TREE_CODE (addr_a) == MEM_REF)
2291         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2292                                        TREE_OPERAND (addr_b, 0));
2293       else
2294         return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
2295                                        TREE_OPERAND (addr_b, 0));
2296     }
2297
2298   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2299      that is being subsetted in the loop nest.  */
2300   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2301     return refs_output_dependent_p (addr_a, addr_b);
2302   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2303     return refs_anti_dependent_p (addr_a, addr_b);
2304   return refs_may_alias_p (addr_a, addr_b);
2305 }
2306
2307 /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
2308    if it is meaningful to compare their associated access functions
2309    when checking for dependencies.  */
2310
2311 static bool
2312 access_fn_components_comparable_p (tree ref_a, tree ref_b)
2313 {
2314   /* Allow pairs of component refs from the following sets:
2315
2316        { REALPART_EXPR, IMAGPART_EXPR }
2317        { COMPONENT_REF }
2318        { ARRAY_REF }.  */
2319   tree_code code_a = TREE_CODE (ref_a);
2320   tree_code code_b = TREE_CODE (ref_b);
2321   if (code_a == IMAGPART_EXPR)
2322     code_a = REALPART_EXPR;
2323   if (code_b == IMAGPART_EXPR)
2324     code_b = REALPART_EXPR;
2325   if (code_a != code_b)
2326     return false;
2327
2328   if (TREE_CODE (ref_a) == COMPONENT_REF)
2329     /* ??? We cannot simply use the type of operand #0 of the refs here as
2330        the Fortran compiler smuggles type punning into COMPONENT_REFs.
2331        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
2332     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
2333             == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
2334
2335   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
2336                              TREE_TYPE (TREE_OPERAND (ref_b, 0)));
2337 }
2338
2339 /* Initialize a data dependence relation between data accesses A and
2340    B.  NB_LOOPS is the number of loops surrounding the references: the
2341    size of the classic distance/direction vectors.  */
2342
2343 struct data_dependence_relation *
2344 initialize_data_dependence_relation (struct data_reference *a,
2345                                      struct data_reference *b,
2346                                      vec<loop_p> loop_nest)
2347 {
2348   struct data_dependence_relation *res;
2349   unsigned int i;
2350
2351   res = XCNEW (struct data_dependence_relation);
2352   DDR_A (res) = a;
2353   DDR_B (res) = b;
2354   DDR_LOOP_NEST (res).create (0);
2355   DDR_SUBSCRIPTS (res).create (0);
2356   DDR_DIR_VECTS (res).create (0);
2357   DDR_DIST_VECTS (res).create (0);
2358
2359   if (a == NULL || b == NULL)
2360     {
2361       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2362       return res;
2363     }
2364
2365   /* If the data references do not alias, then they are independent.  */
2366   if (!dr_may_alias_p (a, b, loop_nest.exists ()))
2367     {
2368       DDR_ARE_DEPENDENT (res) = chrec_known;
2369       return res;
2370     }
2371
2372   unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
2373   unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
2374   if (num_dimensions_a == 0 || num_dimensions_b == 0)
2375     {
2376       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2377       return res;
2378     }
2379
2380   /* For unconstrained bases, the root (highest-indexed) subscript
2381      describes a variation in the base of the original DR_REF rather
2382      than a component access.  We have no type that accurately describes
2383      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
2384      applying this subscript) so limit the search to the last real
2385      component access.
2386
2387      E.g. for:
2388
2389         void
2390         f (int a[][8], int b[][8])
2391         {
2392           for (int i = 0; i < 8; ++i)
2393             a[i * 2][0] = b[i][0];
2394         }
2395
2396      the a and b accesses have a single ARRAY_REF component reference [0]
2397      but have two subscripts.  */
2398   if (DR_UNCONSTRAINED_BASE (a))
2399     num_dimensions_a -= 1;
2400   if (DR_UNCONSTRAINED_BASE (b))
2401     num_dimensions_b -= 1;
2402
2403   /* These structures describe sequences of component references in
2404      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
2405      specific access function.  */
2406   struct {
2407     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
2408        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
2409        indices.  In C notation, these are the indices of the rightmost
2410        component references; e.g. for a sequence .b.c.d, the start
2411        index is for .d.  */
2412     unsigned int start_a;
2413     unsigned int start_b;
2414
2415     /* The sequence contains LENGTH consecutive access functions from
2416        each DR.  */
2417     unsigned int length;
2418
2419     /* The enclosing objects for the A and B sequences respectively,
2420        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
2421        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
2422     tree object_a;
2423     tree object_b;
2424   } full_seq = {}, struct_seq = {};
2425
2426   /* Before each iteration of the loop:
2427
2428      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
2429      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
2430   unsigned int index_a = 0;
2431   unsigned int index_b = 0;
2432   tree ref_a = DR_REF (a);
2433   tree ref_b = DR_REF (b);
2434
2435   /* Now walk the component references from the final DR_REFs back up to
2436      the enclosing base objects.  Each component reference corresponds
2437      to one access function in the DR, with access function 0 being for
2438      the final DR_REF and the highest-indexed access function being the
2439      one that is applied to the base of the DR.
2440
2441      Look for a sequence of component references whose access functions
2442      are comparable (see access_fn_components_comparable_p).  If more
2443      than one such sequence exists, pick the one nearest the base
2444      (which is the leftmost sequence in C notation).  Store this sequence
2445      in FULL_SEQ.
2446
2447      For example, if we have:
2448
2449         struct foo { struct bar s; ... } (*a)[10], (*b)[10];
2450
2451         A: a[0][i].s.c.d
2452         B: __real b[0][i].s.e[i].f
2453
2454      (where d is the same type as the real component of f) then the access
2455      functions would be:
2456
2457                          0   1   2   3
2458         A:              .d  .c  .s [i]
2459
2460                  0   1   2   3   4   5
2461         B:  __real  .f [i]  .e  .s [i]
2462
2463      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
2464      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
2465      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
2466      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
2467      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
2468      index foo[10] arrays, so is again comparable.  The sequence is
2469      therefore:
2470
2471         A: [1, 3]  (i.e. [i].s.c)
2472         B: [3, 5]  (i.e. [i].s.e)
2473
2474      Also look for sequences of component references whose access
2475      functions are comparable and whose enclosing objects have the same
2476      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
2477      example, STRUCT_SEQ would be:
2478
2479         A: [1, 2]  (i.e. s.c)
2480         B: [3, 4]  (i.e. s.e)  */
2481   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
2482     {
2483       /* REF_A and REF_B must be one of the component access types
2484          allowed by dr_analyze_indices.  */
2485       gcc_checking_assert (access_fn_component_p (ref_a));
2486       gcc_checking_assert (access_fn_component_p (ref_b));
2487
2488       /* Get the immediately-enclosing objects for REF_A and REF_B,
2489          i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
2490          and DR_ACCESS_FN (B, INDEX_B).  */
2491       tree object_a = TREE_OPERAND (ref_a, 0);
2492       tree object_b = TREE_OPERAND (ref_b, 0);
2493
2494       tree type_a = TREE_TYPE (object_a);
2495       tree type_b = TREE_TYPE (object_b);
2496       if (access_fn_components_comparable_p (ref_a, ref_b))
2497         {
2498           /* This pair of component accesses is comparable for dependence
2499              analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
2500              DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
2501           if (full_seq.start_a + full_seq.length != index_a
2502               || full_seq.start_b + full_seq.length != index_b)
2503             {
2504               /* The accesses don't extend the current sequence,
2505                  so start a new one here.  */
2506               full_seq.start_a = index_a;
2507               full_seq.start_b = index_b;
2508               full_seq.length = 0;
2509             }
2510
2511           /* Add this pair of references to the sequence.  */
2512           full_seq.length += 1;
2513           full_seq.object_a = object_a;
2514           full_seq.object_b = object_b;
2515
2516           /* If the enclosing objects are structures (and thus have the
2517              same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
2518           if (TREE_CODE (type_a) == RECORD_TYPE)
2519             struct_seq = full_seq;
2520
2521           /* Move to the next containing reference for both A and B.  */
2522           ref_a = object_a;
2523           ref_b = object_b;
2524           index_a += 1;
2525           index_b += 1;
2526           continue;
2527         }
2528
2529       /* Try to approach equal type sizes.  */
2530       if (!COMPLETE_TYPE_P (type_a)
2531           || !COMPLETE_TYPE_P (type_b)
2532           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
2533           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
2534         break;
2535
2536       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
2537       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
2538       if (size_a <= size_b)
2539         {
2540           index_a += 1;
2541           ref_a = object_a;
2542         }
2543       if (size_b <= size_a)
2544         {
2545           index_b += 1;
2546           ref_b = object_b;
2547         }
2548     }
2549
2550   /* See whether FULL_SEQ ends at the base and whether the two bases
2551      are equal.  We do not care about TBAA or alignment info so we can
2552      use OEP_ADDRESS_OF to avoid false negatives.  */
2553   tree base_a = DR_BASE_OBJECT (a);
2554   tree base_b = DR_BASE_OBJECT (b);
2555   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
2556                       && full_seq.start_b + full_seq.length == num_dimensions_b
2557                       && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
2558                       && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
2559                       && types_compatible_p (TREE_TYPE (base_a),
2560                                              TREE_TYPE (base_b))
2561                       && (!loop_nest.exists ()
2562                           || (object_address_invariant_in_loop_p
2563                               (loop_nest[0], base_a))));
2564
2565   /* If the bases are the same, we can include the base variation too.
2566      E.g. the b accesses in:
2567
2568        for (int i = 0; i < n; ++i)
2569          b[i + 4][0] = b[i][0];
2570
2571      have a definite dependence distance of 4, while for:
2572
2573        for (int i = 0; i < n; ++i)
2574          a[i + 4][0] = b[i][0];
2575
2576      the dependence distance depends on the gap between a and b.
2577
2578      If the bases are different then we can only rely on the sequence
2579      rooted at a structure access, since arrays are allowed to overlap
2580      arbitrarily and change shape arbitrarily.  E.g. we treat this as
2581      valid code:
2582
2583        int a[256];
2584        ...
2585        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
2586
2587      where two lvalues with the same int[4][3] type overlap, and where
2588      both lvalues are distinct from the object's declared type.  */
2589   if (same_base_p)
2590     {
2591       if (DR_UNCONSTRAINED_BASE (a))
2592         full_seq.length += 1;
2593     }
2594   else
2595     full_seq = struct_seq;
2596
2597   /* Punt if we didn't find a suitable sequence.  */
2598   if (full_seq.length == 0)
2599     {
2600       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2601       return res;
2602     }
2603
2604   if (!same_base_p)
2605     {
2606       /* Partial overlap is possible for different bases when strict aliasing
2607          is not in effect.  It's also possible if either base involves a union
2608          access; e.g. for:
2609
2610            struct s1 { int a[2]; };
2611            struct s2 { struct s1 b; int c; };
2612            struct s3 { int d; struct s1 e; };
2613            union u { struct s2 f; struct s3 g; } *p, *q;
2614
2615          the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
2616          "p->g.e" (base "p->g") and might partially overlap the s1 at
2617          "q->g.e" (base "q->g").  */
2618       if (!flag_strict_aliasing
2619           || ref_contains_union_access_p (full_seq.object_a)
2620           || ref_contains_union_access_p (full_seq.object_b))
2621         {
2622           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2623           return res;
2624         }
2625
2626       DDR_COULD_BE_INDEPENDENT_P (res) = true;
2627       if (!loop_nest.exists ()
2628           || (object_address_invariant_in_loop_p (loop_nest[0],
2629                                                   full_seq.object_a)
2630               && object_address_invariant_in_loop_p (loop_nest[0],
2631                                                      full_seq.object_b)))
2632         {
2633           DDR_OBJECT_A (res) = full_seq.object_a;
2634           DDR_OBJECT_B (res) = full_seq.object_b;
2635         }
2636     }
2637
2638   DDR_AFFINE_P (res) = true;
2639   DDR_ARE_DEPENDENT (res) = NULL_TREE;
2640   DDR_SUBSCRIPTS (res).create (full_seq.length);
2641   DDR_LOOP_NEST (res) = loop_nest;
2642   DDR_INNER_LOOP (res) = 0;
2643   DDR_SELF_REFERENCE (res) = false;
2644
2645   for (i = 0; i < full_seq.length; ++i)
2646     {
2647       struct subscript *subscript;
2648
2649       subscript = XNEW (struct subscript);
2650       SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
2651       SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
2652       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
2653       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
2654       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
2655       SUB_DISTANCE (subscript) = chrec_dont_know;
2656       DDR_SUBSCRIPTS (res).safe_push (subscript);
2657     }
2658
2659   return res;
2660 }
2661
2662 /* Frees memory used by the conflict function F.  */
2663
2664 static void
2665 free_conflict_function (conflict_function *f)
2666 {
2667   unsigned i;
2668
2669   if (CF_NONTRIVIAL_P (f))
2670     {
2671       for (i = 0; i < f->n; i++)
2672         affine_fn_free (f->fns[i]);
2673     }
2674   free (f);
2675 }
2676
2677 /* Frees memory used by SUBSCRIPTS.  */
2678
2679 static void
2680 free_subscripts (vec<subscript_p> subscripts)
2681 {
2682   unsigned i;
2683   subscript_p s;
2684
2685   FOR_EACH_VEC_ELT (subscripts, i, s)
2686     {
2687       free_conflict_function (s->conflicting_iterations_in_a);
2688       free_conflict_function (s->conflicting_iterations_in_b);
2689       free (s);
2690     }
2691   subscripts.release ();
2692 }
2693
2694 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
2695    description.  */
2696
2697 static inline void
2698 finalize_ddr_dependent (struct data_dependence_relation *ddr,
2699                         tree chrec)
2700 {
2701   DDR_ARE_DEPENDENT (ddr) = chrec;
2702   free_subscripts (DDR_SUBSCRIPTS (ddr));
2703   DDR_SUBSCRIPTS (ddr).create (0);
2704 }
2705
2706 /* The dependence relation DDR cannot be represented by a distance
2707    vector.  */
2708
2709 static inline void
2710 non_affine_dependence_relation (struct data_dependence_relation *ddr)
2711 {
2712   if (dump_file && (dump_flags & TDF_DETAILS))
2713     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
2714
2715   DDR_AFFINE_P (ddr) = false;
2716 }
2717
2718 \f
2719
2720 /* This section contains the classic Banerjee tests.  */
2721
2722 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
2723    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
2724
2725 static inline bool
2726 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2727 {
2728   return (evolution_function_is_constant_p (chrec_a)
2729           && evolution_function_is_constant_p (chrec_b));
2730 }
2731
2732 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
2733    variable, i.e., if the SIV (Single Index Variable) test is true.  */
2734
2735 static bool
2736 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2737 {
2738   if ((evolution_function_is_constant_p (chrec_a)
2739        && evolution_function_is_univariate_p (chrec_b))
2740       || (evolution_function_is_constant_p (chrec_b)
2741           && evolution_function_is_univariate_p (chrec_a)))
2742     return true;
2743
2744   if (evolution_function_is_univariate_p (chrec_a)
2745       && evolution_function_is_univariate_p (chrec_b))
2746     {
2747       switch (TREE_CODE (chrec_a))
2748         {
2749         case POLYNOMIAL_CHREC:
2750           switch (TREE_CODE (chrec_b))
2751             {
2752             case POLYNOMIAL_CHREC:
2753               if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
2754                 return false;
2755               /* FALLTHRU */
2756
2757             default:
2758               return true;
2759             }
2760
2761         default:
2762           return true;
2763         }
2764     }
2765
2766   return false;
2767 }
2768
2769 /* Creates a conflict function with N dimensions.  The affine functions
2770    in each dimension follow.  */
2771
2772 static conflict_function *
2773 conflict_fn (unsigned n, ...)
2774 {
2775   unsigned i;
2776   conflict_function *ret = XCNEW (conflict_function);
2777   va_list ap;
2778
2779   gcc_assert (n > 0 && n <= MAX_DIM);
2780   va_start (ap, n);
2781
2782   ret->n = n;
2783   for (i = 0; i < n; i++)
2784     ret->fns[i] = va_arg (ap, affine_fn);
2785   va_end (ap);
2786
2787   return ret;
2788 }
2789
2790 /* Returns constant affine function with value CST.  */
2791
2792 static affine_fn
2793 affine_fn_cst (tree cst)
2794 {
2795   affine_fn fn;
2796   fn.create (1);
2797   fn.quick_push (cst);
2798   return fn;
2799 }
2800
2801 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
2802
2803 static affine_fn
2804 affine_fn_univar (tree cst, unsigned dim, tree coef)
2805 {
2806   affine_fn fn;
2807   fn.create (dim + 1);
2808   unsigned i;
2809
2810   gcc_assert (dim > 0);
2811   fn.quick_push (cst);
2812   for (i = 1; i < dim; i++)
2813     fn.quick_push (integer_zero_node);
2814   fn.quick_push (coef);
2815   return fn;
2816 }
2817
2818 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
2819    *OVERLAPS_B are initialized to the functions that describe the
2820    relation between the elements accessed twice by CHREC_A and
2821    CHREC_B.  For k >= 0, the following property is verified:
2822
2823    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2824
2825 static void
2826 analyze_ziv_subscript (tree chrec_a,
2827                        tree chrec_b,
2828                        conflict_function **overlaps_a,
2829                        conflict_function **overlaps_b,
2830                        tree *last_conflicts)
2831 {
2832   tree type, difference;
2833   dependence_stats.num_ziv++;
2834
2835   if (dump_file && (dump_flags & TDF_DETAILS))
2836     fprintf (dump_file, "(analyze_ziv_subscript \n");
2837
2838   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2839   chrec_a = chrec_convert (type, chrec_a, NULL);
2840   chrec_b = chrec_convert (type, chrec_b, NULL);
2841   difference = chrec_fold_minus (type, chrec_a, chrec_b);
2842
2843   switch (TREE_CODE (difference))
2844     {
2845     case INTEGER_CST:
2846       if (integer_zerop (difference))
2847         {
2848           /* The difference is equal to zero: the accessed index
2849              overlaps for each iteration in the loop.  */
2850           *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2851           *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2852           *last_conflicts = chrec_dont_know;
2853           dependence_stats.num_ziv_dependent++;
2854         }
2855       else
2856         {
2857           /* The accesses do not overlap.  */
2858           *overlaps_a = conflict_fn_no_dependence ();
2859           *overlaps_b = conflict_fn_no_dependence ();
2860           *last_conflicts = integer_zero_node;
2861           dependence_stats.num_ziv_independent++;
2862         }
2863       break;
2864
2865     default:
2866       /* We're not sure whether the indexes overlap.  For the moment,
2867          conservatively answer "don't know".  */
2868       if (dump_file && (dump_flags & TDF_DETAILS))
2869         fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
2870
2871       *overlaps_a = conflict_fn_not_known ();
2872       *overlaps_b = conflict_fn_not_known ();
2873       *last_conflicts = chrec_dont_know;
2874       dependence_stats.num_ziv_unimplemented++;
2875       break;
2876     }
2877
2878   if (dump_file && (dump_flags & TDF_DETAILS))
2879     fprintf (dump_file, ")\n");
2880 }
2881
2882 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
2883    and only if it fits to the int type.  If this is not the case, or the
2884    bound  on the number of iterations of LOOP could not be derived, returns
2885    chrec_dont_know.  */
2886
2887 static tree
2888 max_stmt_executions_tree (struct loop *loop)
2889 {
2890   widest_int nit;
2891
2892   if (!max_stmt_executions (loop, &nit))
2893     return chrec_dont_know;
2894
2895   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
2896     return chrec_dont_know;
2897
2898   return wide_int_to_tree (unsigned_type_node, nit);
2899 }
2900
2901 /* Determine whether the CHREC is always positive/negative.  If the expression
2902    cannot be statically analyzed, return false, otherwise set the answer into
2903    VALUE.  */
2904
2905 static bool
2906 chrec_is_positive (tree chrec, bool *value)
2907 {
2908   bool value0, value1, value2;
2909   tree end_value, nb_iter;
2910
2911   switch (TREE_CODE (chrec))
2912     {
2913     case POLYNOMIAL_CHREC:
2914       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
2915           || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
2916         return false;
2917
2918       /* FIXME -- overflows.  */
2919       if (value0 == value1)
2920         {
2921           *value = value0;
2922           return true;
2923         }
2924
2925       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
2926          and the proof consists in showing that the sign never
2927          changes during the execution of the loop, from 0 to
2928          loop->nb_iterations.  */
2929       if (!evolution_function_is_affine_p (chrec))
2930         return false;
2931
2932       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
2933       if (chrec_contains_undetermined (nb_iter))
2934         return false;
2935
2936 #if 0
2937       /* TODO -- If the test is after the exit, we may decrease the number of
2938          iterations by one.  */
2939       if (after_exit)
2940         nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
2941 #endif
2942
2943       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
2944
2945       if (!chrec_is_positive (end_value, &value2))
2946         return false;
2947
2948       *value = value0;
2949       return value0 == value1;
2950
2951     case INTEGER_CST:
2952       switch (tree_int_cst_sgn (chrec))
2953         {
2954         case -1:
2955           *value = false;
2956           break;
2957         case 1:
2958           *value = true;
2959           break;
2960         default:
2961           return false;
2962         }
2963       return true;
2964
2965     default:
2966       return false;
2967     }
2968 }
2969
2970
2971 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
2972    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
2973    *OVERLAPS_B are initialized to the functions that describe the
2974    relation between the elements accessed twice by CHREC_A and
2975    CHREC_B.  For k >= 0, the following property is verified:
2976
2977    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2978
2979 static void
2980 analyze_siv_subscript_cst_affine (tree chrec_a,
2981                                   tree chrec_b,
2982                                   conflict_function **overlaps_a,
2983                                   conflict_function **overlaps_b,
2984                                   tree *last_conflicts)
2985 {
2986   bool value0, value1, value2;
2987   tree type, difference, tmp;
2988
2989   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2990   chrec_a = chrec_convert (type, chrec_a, NULL);
2991   chrec_b = chrec_convert (type, chrec_b, NULL);
2992   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
2993
2994   /* Special case overlap in the first iteration.  */
2995   if (integer_zerop (difference))
2996     {
2997       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2998       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2999       *last_conflicts = integer_one_node;
3000       return;
3001     }
3002
3003   if (!chrec_is_positive (initial_condition (difference), &value0))
3004     {
3005       if (dump_file && (dump_flags & TDF_DETAILS))
3006         fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3007
3008       dependence_stats.num_siv_unimplemented++;
3009       *overlaps_a = conflict_fn_not_known ();
3010       *overlaps_b = conflict_fn_not_known ();
3011       *last_conflicts = chrec_dont_know;
3012       return;
3013     }
3014   else
3015     {
3016       if (value0 == false)
3017         {
3018           if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3019             {
3020               if (dump_file && (dump_flags & TDF_DETAILS))
3021                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3022
3023               *overlaps_a = conflict_fn_not_known ();
3024               *overlaps_b = conflict_fn_not_known ();
3025               *last_conflicts = chrec_dont_know;
3026               dependence_stats.num_siv_unimplemented++;
3027               return;
3028             }
3029           else
3030             {
3031               if (value1 == true)
3032                 {
3033                   /* Example:
3034                      chrec_a = 12
3035                      chrec_b = {10, +, 1}
3036                   */
3037
3038                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3039                     {
3040                       HOST_WIDE_INT numiter;
3041                       struct loop *loop = get_chrec_loop (chrec_b);
3042
3043                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3044                       tmp = fold_build2 (EXACT_DIV_EXPR, type,
3045                                          fold_build1 (ABS_EXPR, type, difference),
3046                                          CHREC_RIGHT (chrec_b));
3047                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3048                       *last_conflicts = integer_one_node;
3049
3050
3051                       /* Perform weak-zero siv test to see if overlap is
3052                          outside the loop bounds.  */
3053                       numiter = max_stmt_executions_int (loop);
3054
3055                       if (numiter >= 0
3056                           && compare_tree_int (tmp, numiter) > 0)
3057                         {
3058                           free_conflict_function (*overlaps_a);
3059                           free_conflict_function (*overlaps_b);
3060                           *overlaps_a = conflict_fn_no_dependence ();
3061                           *overlaps_b = conflict_fn_no_dependence ();
3062                           *last_conflicts = integer_zero_node;
3063                           dependence_stats.num_siv_independent++;
3064                           return;
3065                         }
3066                       dependence_stats.num_siv_dependent++;
3067                       return;
3068                     }
3069
3070                   /* When the step does not divide the difference, there are
3071                      no overlaps.  */
3072                   else
3073                     {
3074                       *overlaps_a = conflict_fn_no_dependence ();
3075                       *overlaps_b = conflict_fn_no_dependence ();
3076                       *last_conflicts = integer_zero_node;
3077                       dependence_stats.num_siv_independent++;
3078                       return;
3079                     }
3080                 }
3081
3082               else
3083                 {
3084                   /* Example:
3085                      chrec_a = 12
3086                      chrec_b = {10, +, -1}
3087
3088                      In this case, chrec_a will not overlap with chrec_b.  */
3089                   *overlaps_a = conflict_fn_no_dependence ();
3090                   *overlaps_b = conflict_fn_no_dependence ();
3091                   *last_conflicts = integer_zero_node;
3092                   dependence_stats.num_siv_independent++;
3093                   return;
3094                 }
3095             }
3096         }
3097       else
3098         {
3099           if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3100             {
3101               if (dump_file && (dump_flags & TDF_DETAILS))
3102                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3103
3104               *overlaps_a = conflict_fn_not_known ();
3105               *overlaps_b = conflict_fn_not_known ();
3106               *last_conflicts = chrec_dont_know;
3107               dependence_stats.num_siv_unimplemented++;
3108               return;
3109             }
3110           else
3111             {
3112               if (value2 == false)
3113                 {
3114                   /* Example:
3115                      chrec_a = 3
3116                      chrec_b = {10, +, -1}
3117                   */
3118                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3119                     {
3120                       HOST_WIDE_INT numiter;
3121                       struct loop *loop = get_chrec_loop (chrec_b);
3122
3123                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3124                       tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3125                                          CHREC_RIGHT (chrec_b));
3126                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3127                       *last_conflicts = integer_one_node;
3128
3129                       /* Perform weak-zero siv test to see if overlap is
3130                          outside the loop bounds.  */
3131                       numiter = max_stmt_executions_int (loop);
3132
3133                       if (numiter >= 0
3134                           && compare_tree_int (tmp, numiter) > 0)
3135                         {
3136                           free_conflict_function (*overlaps_a);
3137                           free_conflict_function (*overlaps_b);
3138                           *overlaps_a = conflict_fn_no_dependence ();
3139                           *overlaps_b = conflict_fn_no_dependence ();
3140                           *last_conflicts = integer_zero_node;
3141                           dependence_stats.num_siv_independent++;
3142                           return;
3143                         }
3144                       dependence_stats.num_siv_dependent++;
3145                       return;
3146                     }
3147
3148                   /* When the step does not divide the difference, there
3149                      are no overlaps.  */
3150                   else
3151                     {
3152                       *overlaps_a = conflict_fn_no_dependence ();
3153                       *overlaps_b = conflict_fn_no_dependence ();
3154                       *last_conflicts = integer_zero_node;
3155                       dependence_stats.num_siv_independent++;
3156                       return;
3157                     }
3158                 }
3159               else
3160                 {
3161                   /* Example:
3162                      chrec_a = 3
3163                      chrec_b = {4, +, 1}
3164
3165                      In this case, chrec_a will not overlap with chrec_b.  */
3166                   *overlaps_a = conflict_fn_no_dependence ();
3167                   *overlaps_b = conflict_fn_no_dependence ();
3168                   *last_conflicts = integer_zero_node;
3169                   dependence_stats.num_siv_independent++;
3170                   return;
3171                 }
3172             }
3173         }
3174     }
3175 }
3176
3177 /* Helper recursive function for initializing the matrix A.  Returns
3178    the initial value of CHREC.  */
3179
3180 static tree
3181 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3182 {
3183   gcc_assert (chrec);
3184
3185   switch (TREE_CODE (chrec))
3186     {
3187     case POLYNOMIAL_CHREC:
3188       A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
3189       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3190
3191     case PLUS_EXPR:
3192     case MULT_EXPR:
3193     case MINUS_EXPR:
3194       {
3195         tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3196         tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3197
3198         return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3199       }
3200
3201     CASE_CONVERT:
3202       {
3203         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3204         return chrec_convert (chrec_type (chrec), op, NULL);
3205       }
3206
3207     case BIT_NOT_EXPR:
3208       {
3209         /* Handle ~X as -1 - X.  */
3210         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3211         return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
3212                               build_int_cst (TREE_TYPE (chrec), -1), op);
3213       }
3214
3215     case INTEGER_CST:
3216       return chrec;
3217
3218     default:
3219       gcc_unreachable ();
3220       return NULL_TREE;
3221     }
3222 }
3223
3224 #define FLOOR_DIV(x,y) ((x) / (y))
3225
3226 /* Solves the special case of the Diophantine equation:
3227    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3228
3229    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
3230    number of iterations that loops X and Y run.  The overlaps will be
3231    constructed as evolutions in dimension DIM.  */
3232
3233 static void
3234 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
3235                                          HOST_WIDE_INT step_a,
3236                                          HOST_WIDE_INT step_b,
3237                                          affine_fn *overlaps_a,
3238                                          affine_fn *overlaps_b,
3239                                          tree *last_conflicts, int dim)
3240 {
3241   if (((step_a > 0 && step_b > 0)
3242        || (step_a < 0 && step_b < 0)))
3243     {
3244       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
3245       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
3246
3247       gcd_steps_a_b = gcd (step_a, step_b);
3248       step_overlaps_a = step_b / gcd_steps_a_b;
3249       step_overlaps_b = step_a / gcd_steps_a_b;
3250
3251       if (niter > 0)
3252         {
3253           tau2 = FLOOR_DIV (niter, step_overlaps_a);
3254           tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
3255           last_conflict = tau2;
3256           *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3257         }
3258       else
3259         *last_conflicts = chrec_dont_know;
3260
3261       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
3262                                       build_int_cst (NULL_TREE,
3263                                                      step_overlaps_a));
3264       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
3265                                       build_int_cst (NULL_TREE,
3266                                                      step_overlaps_b));
3267     }
3268
3269   else
3270     {
3271       *overlaps_a = affine_fn_cst (integer_zero_node);
3272       *overlaps_b = affine_fn_cst (integer_zero_node);
3273       *last_conflicts = integer_zero_node;
3274     }
3275 }
3276
3277 /* Solves the special case of a Diophantine equation where CHREC_A is
3278    an affine bivariate function, and CHREC_B is an affine univariate
3279    function.  For example,
3280
3281    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
3282
3283    has the following overlapping functions:
3284
3285    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
3286    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
3287    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
3288
3289    FORNOW: This is a specialized implementation for a case occurring in
3290    a common benchmark.  Implement the general algorithm.  */
3291
3292 static void
3293 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
3294                                       conflict_function **overlaps_a,
3295                                       conflict_function **overlaps_b,
3296                                       tree *last_conflicts)
3297 {
3298   bool xz_p, yz_p, xyz_p;
3299   HOST_WIDE_INT step_x, step_y, step_z;
3300   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
3301   affine_fn overlaps_a_xz, overlaps_b_xz;
3302   affine_fn overlaps_a_yz, overlaps_b_yz;
3303   affine_fn overlaps_a_xyz, overlaps_b_xyz;
3304   affine_fn ova1, ova2, ovb;
3305   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
3306
3307   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
3308   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
3309   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
3310
3311   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
3312   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
3313   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
3314
3315   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
3316     {
3317       if (dump_file && (dump_flags & TDF_DETAILS))
3318         fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
3319
3320       *overlaps_a = conflict_fn_not_known ();
3321       *overlaps_b = conflict_fn_not_known ();
3322       *last_conflicts = chrec_dont_know;
3323       return;
3324     }
3325
3326   niter = MIN (niter_x, niter_z);
3327   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
3328                                            &overlaps_a_xz,
3329                                            &overlaps_b_xz,
3330                                            &last_conflicts_xz, 1);
3331   niter = MIN (niter_y, niter_z);
3332   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
3333                                            &overlaps_a_yz,
3334                                            &overlaps_b_yz,
3335                                            &last_conflicts_yz, 2);
3336   niter = MIN (niter_x, niter_z);
3337   niter = MIN (niter_y, niter);
3338   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
3339                                            &overlaps_a_xyz,
3340                                            &overlaps_b_xyz,
3341                                            &last_conflicts_xyz, 3);
3342
3343   xz_p = !integer_zerop (last_conflicts_xz);
3344   yz_p = !integer_zerop (last_conflicts_yz);
3345   xyz_p = !integer_zerop (last_conflicts_xyz);
3346
3347   if (xz_p || yz_p || xyz_p)
3348     {
3349       ova1 = affine_fn_cst (integer_zero_node);
3350       ova2 = affine_fn_cst (integer_zero_node);
3351       ovb = affine_fn_cst (integer_zero_node);
3352       if (xz_p)
3353         {
3354           affine_fn t0 = ova1;
3355           affine_fn t2 = ovb;
3356
3357           ova1 = affine_fn_plus (ova1, overlaps_a_xz);
3358           ovb = affine_fn_plus (ovb, overlaps_b_xz);
3359           affine_fn_free (t0);
3360           affine_fn_free (t2);
3361           *last_conflicts = last_conflicts_xz;
3362         }
3363       if (yz_p)
3364         {
3365           affine_fn t0 = ova2;
3366           affine_fn t2 = ovb;
3367
3368           ova2 = affine_fn_plus (ova2, overlaps_a_yz);
3369           ovb = affine_fn_plus (ovb, overlaps_b_yz);
3370           affine_fn_free (t0);
3371           affine_fn_free (t2);
3372           *last_conflicts = last_conflicts_yz;
3373         }
3374       if (xyz_p)
3375         {
3376           affine_fn t0 = ova1;
3377           affine_fn t2 = ova2;
3378           affine_fn t4 = ovb;
3379
3380           ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
3381           ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
3382           ovb = affine_fn_plus (ovb, overlaps_b_xyz);
3383           affine_fn_free (t0);
3384           affine_fn_free (t2);
3385           affine_fn_free (t4);
3386           *last_conflicts = last_conflicts_xyz;
3387         }
3388       *overlaps_a = conflict_fn (2, ova1, ova2);
3389       *overlaps_b = conflict_fn (1, ovb);
3390     }
3391   else
3392     {
3393       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3394       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3395       *last_conflicts = integer_zero_node;
3396     }
3397
3398   affine_fn_free (overlaps_a_xz);
3399   affine_fn_free (overlaps_b_xz);
3400   affine_fn_free (overlaps_a_yz);
3401   affine_fn_free (overlaps_b_yz);
3402   affine_fn_free (overlaps_a_xyz);
3403   affine_fn_free (overlaps_b_xyz);
3404 }
3405
3406 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
3407
3408 static void
3409 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
3410                     int size)
3411 {
3412   memcpy (vec2, vec1, size * sizeof (*vec1));
3413 }
3414
3415 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
3416
3417 static void
3418 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
3419                     int m, int n)
3420 {
3421   int i;
3422
3423   for (i = 0; i < m; i++)
3424     lambda_vector_copy (mat1[i], mat2[i], n);
3425 }
3426
3427 /* Store the N x N identity matrix in MAT.  */
3428
3429 static void
3430 lambda_matrix_id (lambda_matrix mat, int size)
3431 {
3432   int i, j;
3433
3434   for (i = 0; i < size; i++)
3435     for (j = 0; j < size; j++)
3436       mat[i][j] = (i == j) ? 1 : 0;
3437 }
3438
3439 /* Return the first nonzero element of vector VEC1 between START and N.
3440    We must have START <= N.   Returns N if VEC1 is the zero vector.  */
3441
3442 static int
3443 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
3444 {
3445   int j = start;
3446   while (j < n && vec1[j] == 0)
3447     j++;
3448   return j;
3449 }
3450
3451 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3452    R2 = R2 + CONST1 * R1.  */
3453
3454 static void
3455 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
3456 {
3457   int i;
3458
3459   if (const1 == 0)
3460     return;
3461
3462   for (i = 0; i < n; i++)
3463     mat[r2][i] += const1 * mat[r1][i];
3464 }
3465
3466 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3467    and store the result in VEC2.  */
3468
3469 static void
3470 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
3471                           int size, int const1)
3472 {
3473   int i;
3474
3475   if (const1 == 0)
3476     lambda_vector_clear (vec2, size);
3477   else
3478     for (i = 0; i < size; i++)
3479       vec2[i] = const1 * vec1[i];
3480 }
3481
3482 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
3483
3484 static void
3485 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
3486                       int size)
3487 {
3488   lambda_vector_mult_const (vec1, vec2, size, -1);
3489 }
3490
3491 /* Negate row R1 of matrix MAT which has N columns.  */
3492
3493 static void
3494 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
3495 {
3496   lambda_vector_negate (mat[r1], mat[r1], n);
3497 }
3498
3499 /* Return true if two vectors are equal.  */
3500
3501 static bool
3502 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
3503 {
3504   int i;
3505   for (i = 0; i < size; i++)
3506     if (vec1[i] != vec2[i])
3507       return false;
3508   return true;
3509 }
3510
3511 /* Given an M x N integer matrix A, this function determines an M x
3512    M unimodular matrix U, and an M x N echelon matrix S such that
3513    "U.A = S".  This decomposition is also known as "right Hermite".
3514
3515    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3516    Restructuring Compilers" Utpal Banerjee.  */
3517
3518 static void
3519 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
3520                              lambda_matrix S, lambda_matrix U)
3521 {
3522   int i, j, i0 = 0;
3523
3524   lambda_matrix_copy (A, S, m, n);
3525   lambda_matrix_id (U, m);
3526
3527   for (j = 0; j < n; j++)
3528     {
3529       if (lambda_vector_first_nz (S[j], m, i0) < m)
3530         {
3531           ++i0;
3532           for (i = m - 1; i >= i0; i--)
3533             {
3534               while (S[i][j] != 0)
3535                 {
3536                   int sigma, factor, a, b;
3537
3538                   a = S[i-1][j];
3539                   b = S[i][j];
3540                   sigma = (a * b < 0) ? -1: 1;
3541                   a = abs (a);
3542                   b = abs (b);
3543                   factor = sigma * (a / b);
3544
3545                   lambda_matrix_row_add (S, n, i, i-1, -factor);
3546                   std::swap (S[i], S[i-1]);
3547
3548                   lambda_matrix_row_add (U, m, i, i-1, -factor);
3549                   std::swap (U[i], U[i-1]);
3550                 }
3551             }
3552         }
3553     }
3554 }
3555
3556 /* Determines the overlapping elements due to accesses CHREC_A and
3557    CHREC_B, that are affine functions.  This function cannot handle
3558    symbolic evolution functions, ie. when initial conditions are
3559    parameters, because it uses lambda matrices of integers.  */
3560
3561 static void
3562 analyze_subscript_affine_affine (tree chrec_a,
3563                                  tree chrec_b,
3564                                  conflict_function **overlaps_a,
3565                                  conflict_function **overlaps_b,
3566                                  tree *last_conflicts)
3567 {
3568   unsigned nb_vars_a, nb_vars_b, dim;
3569   HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
3570   lambda_matrix A, U, S;
3571   struct obstack scratch_obstack;
3572
3573   if (eq_evolutions_p (chrec_a, chrec_b))
3574     {
3575       /* The accessed index overlaps for each iteration in the
3576          loop.  */
3577       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3578       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3579       *last_conflicts = chrec_dont_know;
3580       return;
3581     }
3582   if (dump_file && (dump_flags & TDF_DETAILS))
3583     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
3584
3585   /* For determining the initial intersection, we have to solve a
3586      Diophantine equation.  This is the most time consuming part.
3587
3588      For answering to the question: "Is there a dependence?" we have
3589      to prove that there exists a solution to the Diophantine
3590      equation, and that the solution is in the iteration domain,
3591      i.e. the solution is positive or zero, and that the solution
3592      happens before the upper bound loop.nb_iterations.  Otherwise
3593      there is no dependence.  This function outputs a description of
3594      the iterations that hold the intersections.  */
3595
3596   nb_vars_a = nb_vars_in_chrec (chrec_a);
3597   nb_vars_b = nb_vars_in_chrec (chrec_b);
3598
3599   gcc_obstack_init (&scratch_obstack);
3600
3601   dim = nb_vars_a + nb_vars_b;
3602   U = lambda_matrix_new (dim, dim, &scratch_obstack);
3603   A = lambda_matrix_new (dim, 1, &scratch_obstack);
3604   S = lambda_matrix_new (dim, 1, &scratch_obstack);
3605
3606   init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
3607   init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
3608   gamma = init_b - init_a;
3609
3610   /* Don't do all the hard work of solving the Diophantine equation
3611      when we already know the solution: for example,
3612      | {3, +, 1}_1
3613      | {3, +, 4}_2
3614      | gamma = 3 - 3 = 0.
3615      Then the first overlap occurs during the first iterations:
3616      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
3617   */
3618   if (gamma == 0)
3619     {
3620       if (nb_vars_a == 1 && nb_vars_b == 1)
3621         {
3622           HOST_WIDE_INT step_a, step_b;
3623           HOST_WIDE_INT niter, niter_a, niter_b;
3624           affine_fn ova, ovb;
3625
3626           niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
3627           niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
3628           niter = MIN (niter_a, niter_b);
3629           step_a = int_cst_value (CHREC_RIGHT (chrec_a));
3630           step_b = int_cst_value (CHREC_RIGHT (chrec_b));
3631
3632           compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
3633                                                    &ova, &ovb,
3634                                                    last_conflicts, 1);
3635           *overlaps_a = conflict_fn (1, ova);
3636           *overlaps_b = conflict_fn (1, ovb);
3637         }
3638
3639       else if (nb_vars_a == 2 && nb_vars_b == 1)
3640         compute_overlap_steps_for_affine_1_2
3641           (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
3642
3643       else if (nb_vars_a == 1 && nb_vars_b == 2)
3644         compute_overlap_steps_for_affine_1_2
3645           (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
3646
3647       else
3648         {
3649           if (dump_file && (dump_flags & TDF_DETAILS))
3650             fprintf (dump_file, "affine-affine test failed: too many variables.\n");
3651           *overlaps_a = conflict_fn_not_known ();
3652           *overlaps_b = conflict_fn_not_known ();
3653           *last_conflicts = chrec_dont_know;
3654         }
3655       goto end_analyze_subs_aa;
3656     }
3657
3658   /* U.A = S */
3659   lambda_matrix_right_hermite (A, dim, 1, S, U);
3660
3661   if (S[0][0] < 0)
3662     {
3663       S[0][0] *= -1;
3664       lambda_matrix_row_negate (U, dim, 0);
3665     }
3666   gcd_alpha_beta = S[0][0];
3667
3668   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
3669      but that is a quite strange case.  Instead of ICEing, answer
3670      don't know.  */
3671   if (gcd_alpha_beta == 0)
3672     {
3673       *overlaps_a = conflict_fn_not_known ();
3674       *overlaps_b = conflict_fn_not_known ();
3675       *last_conflicts = chrec_dont_know;
3676       goto end_analyze_subs_aa;
3677     }
3678
3679   /* The classic "gcd-test".  */
3680   if (!int_divides_p (gcd_alpha_beta, gamma))
3681     {
3682       /* The "gcd-test" has determined that there is no integer
3683          solution, i.e. there is no dependence.  */
3684       *overlaps_a = conflict_fn_no_dependence ();
3685       *overlaps_b = conflict_fn_no_dependence ();
3686       *last_conflicts = integer_zero_node;
3687     }
3688
3689   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
3690   else if (nb_vars_a == 1 && nb_vars_b == 1)
3691     {
3692       /* Both functions should have the same evolution sign.  */
3693       if (((A[0][0] > 0 && -A[1][0] > 0)
3694            || (A[0][0] < 0 && -A[1][0] < 0)))
3695         {
3696           /* The solutions are given by:
3697              |
3698              | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
3699              |                           [u21 u22]    [y0]
3700
3701              For a given integer t.  Using the following variables,
3702
3703              | i0 = u11 * gamma / gcd_alpha_beta
3704              | j0 = u12 * gamma / gcd_alpha_beta
3705              | i1 = u21
3706              | j1 = u22
3707
3708              the solutions are:
3709
3710              | x0 = i0 + i1 * t,
3711              | y0 = j0 + j1 * t.  */
3712           HOST_WIDE_INT i0, j0, i1, j1;
3713
3714           i0 = U[0][0] * gamma / gcd_alpha_beta;
3715           j0 = U[0][1] * gamma / gcd_alpha_beta;
3716           i1 = U[1][0];
3717           j1 = U[1][1];
3718
3719           if ((i1 == 0 && i0 < 0)
3720               || (j1 == 0 && j0 < 0))
3721             {
3722               /* There is no solution.
3723                  FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
3724                  falls in here, but for the moment we don't look at the
3725                  upper bound of the iteration domain.  */
3726               *overlaps_a = conflict_fn_no_dependence ();
3727               *overlaps_b = conflict_fn_no_dependence ();
3728               *last_conflicts = integer_zero_node;
3729               goto end_analyze_subs_aa;
3730             }
3731
3732           if (i1 > 0 && j1 > 0)
3733             {
3734               HOST_WIDE_INT niter_a
3735                 = max_stmt_executions_int (get_chrec_loop (chrec_a));
3736               HOST_WIDE_INT niter_b
3737                 = max_stmt_executions_int (get_chrec_loop (chrec_b));
3738               HOST_WIDE_INT niter = MIN (niter_a, niter_b);
3739
3740               /* (X0, Y0) is a solution of the Diophantine equation:
3741                  "chrec_a (X0) = chrec_b (Y0)".  */
3742               HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
3743                                         CEIL (-j0, j1));
3744               HOST_WIDE_INT x0 = i1 * tau1 + i0;
3745               HOST_WIDE_INT y0 = j1 * tau1 + j0;
3746
3747               /* (X1, Y1) is the smallest positive solution of the eq
3748                  "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
3749                  first conflict occurs.  */
3750               HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
3751               HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
3752               HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
3753
3754               if (niter > 0)
3755                 {
3756                   HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter_a - i0, i1),
3757                                             FLOOR_DIV (niter_b - j0, j1));
3758                   HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
3759
3760                   /* If the overlap occurs outside of the bounds of the
3761                      loop, there is no dependence.  */
3762                   if (x1 >= niter_a || y1 >= niter_b)
3763                     {
3764                       *overlaps_a = conflict_fn_no_dependence ();
3765                       *overlaps_b = conflict_fn_no_dependence ();
3766                       *last_conflicts = integer_zero_node;
3767                       goto end_analyze_subs_aa;
3768                     }
3769                   else
3770                     *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3771                 }
3772               else
3773                 *last_conflicts = chrec_dont_know;
3774
3775               *overlaps_a
3776                 = conflict_fn (1,
3777                                affine_fn_univar (build_int_cst (NULL_TREE, x1),
3778                                                  1,
3779                                                  build_int_cst (NULL_TREE, i1)));
3780               *overlaps_b
3781                 = conflict_fn (1,
3782                                affine_fn_univar (build_int_cst (NULL_TREE, y1),
3783                                                  1,
3784                                                  build_int_cst (NULL_TREE, j1)));
3785             }
3786           else
3787             {
3788               /* FIXME: For the moment, the upper bound of the
3789                  iteration domain for i and j is not checked.  */
3790               if (dump_file && (dump_flags & TDF_DETAILS))
3791                 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3792               *overlaps_a = conflict_fn_not_known ();
3793               *overlaps_b = conflict_fn_not_known ();
3794               *last_conflicts = chrec_dont_know;
3795             }
3796         }
3797       else
3798         {
3799           if (dump_file && (dump_flags & TDF_DETAILS))
3800             fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3801           *overlaps_a = conflict_fn_not_known ();
3802           *overlaps_b = conflict_fn_not_known ();
3803           *last_conflicts = chrec_dont_know;
3804         }
3805     }
3806   else
3807     {
3808       if (dump_file && (dump_flags & TDF_DETAILS))
3809         fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3810       *overlaps_a = conflict_fn_not_known ();
3811       *overlaps_b = conflict_fn_not_known ();
3812       *last_conflicts = chrec_dont_know;
3813     }
3814
3815 end_analyze_subs_aa:
3816   obstack_free (&scratch_obstack, NULL);
3817   if (dump_file && (dump_flags & TDF_DETAILS))
3818     {
3819       fprintf (dump_file, "  (overlaps_a = ");
3820       dump_conflict_function (dump_file, *overlaps_a);
3821       fprintf (dump_file, ")\n  (overlaps_b = ");
3822       dump_conflict_function (dump_file, *overlaps_b);
3823       fprintf (dump_file, "))\n");
3824     }
3825 }
3826
3827 /* Returns true when analyze_subscript_affine_affine can be used for
3828    determining the dependence relation between chrec_a and chrec_b,
3829    that contain symbols.  This function modifies chrec_a and chrec_b
3830    such that the analysis result is the same, and such that they don't
3831    contain symbols, and then can safely be passed to the analyzer.
3832
3833    Example: The analysis of the following tuples of evolutions produce
3834    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
3835    vs. {0, +, 1}_1
3836
3837    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
3838    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
3839 */
3840
3841 static bool
3842 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
3843 {
3844   tree diff, type, left_a, left_b, right_b;
3845
3846   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
3847       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
3848     /* FIXME: For the moment not handled.  Might be refined later.  */
3849     return false;
3850
3851   type = chrec_type (*chrec_a);
3852   left_a = CHREC_LEFT (*chrec_a);
3853   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
3854   diff = chrec_fold_minus (type, left_a, left_b);
3855
3856   if (!evolution_function_is_constant_p (diff))
3857     return false;
3858
3859   if (dump_file && (dump_flags & TDF_DETAILS))
3860     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
3861
3862   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
3863                                      diff, CHREC_RIGHT (*chrec_a));
3864   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
3865   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
3866                                      build_int_cst (type, 0),
3867                                      right_b);
3868   return true;
3869 }
3870
3871 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
3872    *OVERLAPS_B are initialized to the functions that describe the
3873    relation between the elements accessed twice by CHREC_A and
3874    CHREC_B.  For k >= 0, the following property is verified:
3875
3876    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3877
3878 static void
3879 analyze_siv_subscript (tree chrec_a,
3880                        tree chrec_b,
3881                        conflict_function **overlaps_a,
3882                        conflict_function **overlaps_b,
3883                        tree *last_conflicts,
3884                        int loop_nest_num)
3885 {
3886   dependence_stats.num_siv++;
3887
3888   if (dump_file && (dump_flags & TDF_DETAILS))
3889     fprintf (dump_file, "(analyze_siv_subscript \n");
3890
3891   if (evolution_function_is_constant_p (chrec_a)
3892       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3893     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
3894                                       overlaps_a, overlaps_b, last_conflicts);
3895
3896   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3897            && evolution_function_is_constant_p (chrec_b))
3898     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
3899                                       overlaps_b, overlaps_a, last_conflicts);
3900
3901   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3902            && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3903     {
3904       if (!chrec_contains_symbols (chrec_a)
3905           && !chrec_contains_symbols (chrec_b))
3906         {
3907           analyze_subscript_affine_affine (chrec_a, chrec_b,
3908                                            overlaps_a, overlaps_b,
3909                                            last_conflicts);
3910
3911           if (CF_NOT_KNOWN_P (*overlaps_a)
3912               || CF_NOT_KNOWN_P (*overlaps_b))
3913             dependence_stats.num_siv_unimplemented++;
3914           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3915                    || CF_NO_DEPENDENCE_P (*overlaps_b))
3916             dependence_stats.num_siv_independent++;
3917           else
3918             dependence_stats.num_siv_dependent++;
3919         }
3920       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
3921                                                         &chrec_b))
3922         {
3923           analyze_subscript_affine_affine (chrec_a, chrec_b,
3924                                            overlaps_a, overlaps_b,
3925                                            last_conflicts);
3926
3927           if (CF_NOT_KNOWN_P (*overlaps_a)
3928               || CF_NOT_KNOWN_P (*overlaps_b))
3929             dependence_stats.num_siv_unimplemented++;
3930           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3931                    || CF_NO_DEPENDENCE_P (*overlaps_b))
3932             dependence_stats.num_siv_independent++;
3933           else
3934             dependence_stats.num_siv_dependent++;
3935         }
3936       else
3937         goto siv_subscript_dontknow;
3938     }
3939
3940   else
3941     {
3942     siv_subscript_dontknow:;
3943       if (dump_file && (dump_flags & TDF_DETAILS))
3944         fprintf (dump_file, "  siv test failed: unimplemented");
3945       *overlaps_a = conflict_fn_not_known ();
3946       *overlaps_b = conflict_fn_not_known ();
3947       *last_conflicts = chrec_dont_know;
3948       dependence_stats.num_siv_unimplemented++;
3949     }
3950
3951   if (dump_file && (dump_flags & TDF_DETAILS))
3952     fprintf (dump_file, ")\n");
3953 }
3954
3955 /* Returns false if we can prove that the greatest common divisor of the steps
3956    of CHREC does not divide CST, false otherwise.  */
3957
3958 static bool
3959 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
3960 {
3961   HOST_WIDE_INT cd = 0, val;
3962   tree step;
3963
3964   if (!tree_fits_shwi_p (cst))
3965     return true;
3966   val = tree_to_shwi (cst);
3967
3968   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
3969     {
3970       step = CHREC_RIGHT (chrec);
3971       if (!tree_fits_shwi_p (step))
3972         return true;
3973       cd = gcd (cd, tree_to_shwi (step));
3974       chrec = CHREC_LEFT (chrec);
3975     }
3976
3977   return val % cd == 0;
3978 }
3979
3980 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
3981    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
3982    functions that describe the relation between the elements accessed
3983    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
3984    is verified:
3985
3986    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3987
3988 static void
3989 analyze_miv_subscript (tree chrec_a,
3990                        tree chrec_b,
3991                        conflict_function **overlaps_a,
3992                        conflict_function **overlaps_b,
3993                        tree *last_conflicts,
3994                        struct loop *loop_nest)
3995 {
3996   tree type, difference;
3997
3998   dependence_stats.num_miv++;
3999   if (dump_file && (dump_flags & TDF_DETAILS))
4000     fprintf (dump_file, "(analyze_miv_subscript \n");
4001
4002   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4003   chrec_a = chrec_convert (type, chrec_a, NULL);
4004   chrec_b = chrec_convert (type, chrec_b, NULL);
4005   difference = chrec_fold_minus (type, chrec_a, chrec_b);
4006
4007   if (eq_evolutions_p (chrec_a, chrec_b))
4008     {
4009       /* Access functions are the same: all the elements are accessed
4010          in the same order.  */
4011       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4012       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4013       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4014       dependence_stats.num_miv_dependent++;
4015     }
4016
4017   else if (evolution_function_is_constant_p (difference)
4018            && evolution_function_is_affine_multivariate_p (chrec_a,
4019                                                            loop_nest->num)
4020            && !gcd_of_steps_may_divide_p (chrec_a, difference))
4021     {
4022       /* testsuite/.../ssa-chrec-33.c
4023          {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
4024
4025          The difference is 1, and all the evolution steps are multiples
4026          of 2, consequently there are no overlapping elements.  */
4027       *overlaps_a = conflict_fn_no_dependence ();
4028       *overlaps_b = conflict_fn_no_dependence ();
4029       *last_conflicts = integer_zero_node;
4030       dependence_stats.num_miv_independent++;
4031     }
4032
4033   else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
4034            && !chrec_contains_symbols (chrec_a)
4035            && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
4036            && !chrec_contains_symbols (chrec_b))
4037     {
4038       /* testsuite/.../ssa-chrec-35.c
4039          {0, +, 1}_2  vs.  {0, +, 1}_3
4040          the overlapping elements are respectively located at iterations:
4041          {0, +, 1}_x and {0, +, 1}_x,
4042          in other words, we have the equality:
4043          {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4044
4045          Other examples:
4046          {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4047          {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4048
4049          {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4050          {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4051       */
4052       analyze_subscript_affine_affine (chrec_a, chrec_b,
4053                                        overlaps_a, overlaps_b, last_conflicts);
4054
4055       if (CF_NOT_KNOWN_P (*overlaps_a)
4056           || CF_NOT_KNOWN_P (*overlaps_b))
4057         dependence_stats.num_miv_unimplemented++;
4058       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4059                || CF_NO_DEPENDENCE_P (*overlaps_b))
4060         dependence_stats.num_miv_independent++;
4061       else
4062         dependence_stats.num_miv_dependent++;
4063     }
4064
4065   else
4066     {
4067       /* When the analysis is too difficult, answer "don't know".  */
4068       if (dump_file && (dump_flags & TDF_DETAILS))
4069         fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4070
4071       *overlaps_a = conflict_fn_not_known ();
4072       *overlaps_b = conflict_fn_not_known ();
4073       *last_conflicts = chrec_dont_know;
4074       dependence_stats.num_miv_unimplemented++;
4075     }
4076
4077   if (dump_file && (dump_flags & TDF_DETAILS))
4078     fprintf (dump_file, ")\n");
4079 }
4080
4081 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4082    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
4083    OVERLAP_ITERATIONS_B are initialized with two functions that
4084    describe the iterations that contain conflicting elements.
4085
4086    Remark: For an integer k >= 0, the following equality is true:
4087
4088    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4089 */
4090
4091 static void
4092 analyze_overlapping_iterations (tree chrec_a,
4093                                 tree chrec_b,
4094                                 conflict_function **overlap_iterations_a,
4095                                 conflict_function **overlap_iterations_b,
4096                                 tree *last_conflicts, struct loop *loop_nest)
4097 {
4098   unsigned int lnn = loop_nest->num;
4099
4100   dependence_stats.num_subscript_tests++;
4101
4102   if (dump_file && (dump_flags & TDF_DETAILS))
4103     {
4104       fprintf (dump_file, "(analyze_overlapping_iterations \n");
4105       fprintf (dump_file, "  (chrec_a = ");
4106       print_generic_expr (dump_file, chrec_a);
4107       fprintf (dump_file, ")\n  (chrec_b = ");
4108       print_generic_expr (dump_file, chrec_b);
4109       fprintf (dump_file, ")\n");
4110     }
4111
4112   if (chrec_a == NULL_TREE
4113       || chrec_b == NULL_TREE
4114       || chrec_contains_undetermined (chrec_a)
4115       || chrec_contains_undetermined (chrec_b))
4116     {
4117       dependence_stats.num_subscript_undetermined++;
4118
4119       *overlap_iterations_a = conflict_fn_not_known ();
4120       *overlap_iterations_b = conflict_fn_not_known ();
4121     }
4122
4123   /* If they are the same chrec, and are affine, they overlap
4124      on every iteration.  */
4125   else if (eq_evolutions_p (chrec_a, chrec_b)
4126            && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4127                || operand_equal_p (chrec_a, chrec_b, 0)))
4128     {
4129       dependence_stats.num_same_subscript_function++;
4130       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4131       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4132       *last_conflicts = chrec_dont_know;
4133     }
4134
4135   /* If they aren't the same, and aren't affine, we can't do anything
4136      yet.  */
4137   else if ((chrec_contains_symbols (chrec_a)
4138             || chrec_contains_symbols (chrec_b))
4139            && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4140                || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4141     {
4142       dependence_stats.num_subscript_undetermined++;
4143       *overlap_iterations_a = conflict_fn_not_known ();
4144       *overlap_iterations_b = conflict_fn_not_known ();
4145     }
4146
4147   else if (ziv_subscript_p (chrec_a, chrec_b))
4148     analyze_ziv_subscript (chrec_a, chrec_b,
4149                            overlap_iterations_a, overlap_iterations_b,
4150                            last_conflicts);
4151
4152   else if (siv_subscript_p (chrec_a, chrec_b))
4153     analyze_siv_subscript (chrec_a, chrec_b,
4154                            overlap_iterations_a, overlap_iterations_b,
4155                            last_conflicts, lnn);
4156
4157   else
4158     analyze_miv_subscript (chrec_a, chrec_b,
4159                            overlap_iterations_a, overlap_iterations_b,
4160                            last_conflicts, loop_nest);
4161
4162   if (dump_file && (dump_flags & TDF_DETAILS))
4163     {
4164       fprintf (dump_file, "  (overlap_iterations_a = ");
4165       dump_conflict_function (dump_file, *overlap_iterations_a);
4166       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
4167       dump_conflict_function (dump_file, *overlap_iterations_b);
4168       fprintf (dump_file, "))\n");
4169     }
4170 }
4171
4172 /* Helper function for uniquely inserting distance vectors.  */
4173
4174 static void
4175 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
4176 {
4177   unsigned i;
4178   lambda_vector v;
4179
4180   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
4181     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
4182       return;
4183
4184   DDR_DIST_VECTS (ddr).safe_push (dist_v);
4185 }
4186
4187 /* Helper function for uniquely inserting direction vectors.  */
4188
4189 static void
4190 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
4191 {
4192   unsigned i;
4193   lambda_vector v;
4194
4195   FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
4196     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
4197       return;
4198
4199   DDR_DIR_VECTS (ddr).safe_push (dir_v);
4200 }
4201
4202 /* Add a distance of 1 on all the loops outer than INDEX.  If we
4203    haven't yet determined a distance for this outer loop, push a new
4204    distance vector composed of the previous distance, and a distance
4205    of 1 for this outer loop.  Example:
4206
4207    | loop_1
4208    |   loop_2
4209    |     A[10]
4210    |   endloop_2
4211    | endloop_1
4212
4213    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
4214    save (0, 1), then we have to save (1, 0).  */
4215
4216 static void
4217 add_outer_distances (struct data_dependence_relation *ddr,
4218                      lambda_vector dist_v, int index)
4219 {
4220   /* For each outer loop where init_v is not set, the accesses are
4221      in dependence of distance 1 in the loop.  */
4222   while (--index >= 0)
4223     {
4224       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4225       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4226       save_v[index] = 1;
4227       save_dist_v (ddr, save_v);
4228     }
4229 }
4230
4231 /* Return false when fail to represent the data dependence as a
4232    distance vector.  A_INDEX is the index of the first reference
4233    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
4234    second reference.  INIT_B is set to true when a component has been
4235    added to the distance vector DIST_V.  INDEX_CARRY is then set to
4236    the index in DIST_V that carries the dependence.  */
4237
4238 static bool
4239 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
4240                              unsigned int a_index, unsigned int b_index,
4241                              lambda_vector dist_v, bool *init_b,
4242                              int *index_carry)
4243 {
4244   unsigned i;
4245   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4246
4247   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4248     {
4249       tree access_fn_a, access_fn_b;
4250       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
4251
4252       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4253         {
4254           non_affine_dependence_relation (ddr);
4255           return false;
4256         }
4257
4258       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
4259       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
4260
4261       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
4262           && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
4263         {
4264           HOST_WIDE_INT dist;
4265           int index;
4266           int var_a = CHREC_VARIABLE (access_fn_a);
4267           int var_b = CHREC_VARIABLE (access_fn_b);
4268
4269           if (var_a != var_b
4270               || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4271             {
4272               non_affine_dependence_relation (ddr);
4273               return false;
4274             }
4275
4276           dist = int_cst_value (SUB_DISTANCE (subscript));
4277           index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
4278           *index_carry = MIN (index, *index_carry);
4279
4280           /* This is the subscript coupling test.  If we have already
4281              recorded a distance for this loop (a distance coming from
4282              another subscript), it should be the same.  For example,
4283              in the following code, there is no dependence:
4284
4285              | loop i = 0, N, 1
4286              |   T[i+1][i] = ...
4287              |   ... = T[i][i]
4288              | endloop
4289           */
4290           if (init_v[index] != 0 && dist_v[index] != dist)
4291             {
4292               finalize_ddr_dependent (ddr, chrec_known);
4293               return false;
4294             }
4295
4296           dist_v[index] = dist;
4297           init_v[index] = 1;
4298           *init_b = true;
4299         }
4300       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
4301         {
4302           /* This can be for example an affine vs. constant dependence
4303              (T[i] vs. T[3]) that is not an affine dependence and is
4304              not representable as a distance vector.  */
4305           non_affine_dependence_relation (ddr);
4306           return false;
4307         }
4308     }
4309
4310   return true;
4311 }
4312
4313 /* Return true when the DDR contains only constant access functions.  */
4314
4315 static bool
4316 constant_access_functions (const struct data_dependence_relation *ddr)
4317 {
4318   unsigned i;
4319   subscript *sub;
4320
4321   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4322     if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
4323         || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
4324       return false;
4325
4326   return true;
4327 }
4328
4329 /* Helper function for the case where DDR_A and DDR_B are the same
4330    multivariate access function with a constant step.  For an example
4331    see pr34635-1.c.  */
4332
4333 static void
4334 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
4335 {
4336   int x_1, x_2;
4337   tree c_1 = CHREC_LEFT (c_2);
4338   tree c_0 = CHREC_LEFT (c_1);
4339   lambda_vector dist_v;
4340   HOST_WIDE_INT v1, v2, cd;
4341
4342   /* Polynomials with more than 2 variables are not handled yet.  When
4343      the evolution steps are parameters, it is not possible to
4344      represent the dependence using classical distance vectors.  */
4345   if (TREE_CODE (c_0) != INTEGER_CST
4346       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
4347       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
4348     {
4349       DDR_AFFINE_P (ddr) = false;
4350       return;
4351     }
4352
4353   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
4354   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
4355
4356   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
4357   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4358   v1 = int_cst_value (CHREC_RIGHT (c_1));
4359   v2 = int_cst_value (CHREC_RIGHT (c_2));
4360   cd = gcd (v1, v2);
4361   v1 /= cd;
4362   v2 /= cd;
4363
4364   if (v2 < 0)
4365     {
4366       v2 = -v2;
4367       v1 = -v1;
4368     }
4369
4370   dist_v[x_1] = v2;
4371   dist_v[x_2] = -v1;
4372   save_dist_v (ddr, dist_v);
4373
4374   add_outer_distances (ddr, dist_v, x_1);
4375 }
4376
4377 /* Helper function for the case where DDR_A and DDR_B are the same
4378    access functions.  */
4379
4380 static void
4381 add_other_self_distances (struct data_dependence_relation *ddr)
4382 {
4383   lambda_vector dist_v;
4384   unsigned i;
4385   int index_carry = DDR_NB_LOOPS (ddr);
4386   subscript *sub;
4387
4388   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4389     {
4390       tree access_fun = SUB_ACCESS_FN (sub, 0);
4391
4392       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
4393         {
4394           if (!evolution_function_is_univariate_p (access_fun))
4395             {
4396               if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
4397                 {
4398                   DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
4399                   return;
4400                 }
4401
4402               access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
4403
4404               if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
4405                 add_multivariate_self_dist (ddr, access_fun);
4406               else
4407                 /* The evolution step is not constant: it varies in
4408                    the outer loop, so this cannot be represented by a
4409                    distance vector.  For example in pr34635.c the
4410                    evolution is {0, +, {0, +, 4}_1}_2.  */
4411                 DDR_AFFINE_P (ddr) = false;
4412
4413               return;
4414             }
4415
4416           index_carry = MIN (index_carry,
4417                              index_in_loop_nest (CHREC_VARIABLE (access_fun),
4418                                                  DDR_LOOP_NEST (ddr)));
4419         }
4420     }
4421
4422   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4423   add_outer_distances (ddr, dist_v, index_carry);
4424 }
4425
4426 static void
4427 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
4428 {
4429   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4430
4431   dist_v[DDR_INNER_LOOP (ddr)] = 1;
4432   save_dist_v (ddr, dist_v);
4433 }
4434
4435 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
4436    is the case for example when access functions are the same and
4437    equal to a constant, as in:
4438
4439    | loop_1
4440    |   A[3] = ...
4441    |   ... = A[3]
4442    | endloop_1
4443
4444    in which case the distance vectors are (0) and (1).  */
4445
4446 static void
4447 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
4448 {
4449   unsigned i, j;
4450
4451   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4452     {
4453       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
4454       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
4455       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
4456
4457       for (j = 0; j < ca->n; j++)
4458         if (affine_function_zero_p (ca->fns[j]))
4459           {
4460             insert_innermost_unit_dist_vector (ddr);
4461             return;
4462           }
4463
4464       for (j = 0; j < cb->n; j++)
4465         if (affine_function_zero_p (cb->fns[j]))
4466           {
4467             insert_innermost_unit_dist_vector (ddr);
4468             return;
4469           }
4470     }
4471 }
4472
4473 /* Return true when the DDR contains two data references that have the
4474    same access functions.  */
4475
4476 static inline bool
4477 same_access_functions (const struct data_dependence_relation *ddr)
4478 {
4479   unsigned i;
4480   subscript *sub;
4481
4482   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4483     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
4484                           SUB_ACCESS_FN (sub, 1)))
4485       return false;
4486
4487   return true;
4488 }
4489
4490 /* Compute the classic per loop distance vector.  DDR is the data
4491    dependence relation to build a vector from.  Return false when fail
4492    to represent the data dependence as a distance vector.  */
4493
4494 static bool
4495 build_classic_dist_vector (struct data_dependence_relation *ddr,
4496                            struct loop *loop_nest)
4497 {
4498   bool init_b = false;
4499   int index_carry = DDR_NB_LOOPS (ddr);
4500   lambda_vector dist_v;
4501
4502   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4503     return false;
4504
4505   if (same_access_functions (ddr))
4506     {
4507       /* Save the 0 vector.  */
4508       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4509       save_dist_v (ddr, dist_v);
4510
4511       if (constant_access_functions (ddr))
4512         add_distance_for_zero_overlaps (ddr);
4513
4514       if (DDR_NB_LOOPS (ddr) > 1)
4515         add_other_self_distances (ddr);
4516
4517       return true;
4518     }
4519
4520   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4521   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
4522     return false;
4523
4524   /* Save the distance vector if we initialized one.  */
4525   if (init_b)
4526     {
4527       /* Verify a basic constraint: classic distance vectors should
4528          always be lexicographically positive.
4529
4530          Data references are collected in the order of execution of
4531          the program, thus for the following loop
4532
4533          | for (i = 1; i < 100; i++)
4534          |   for (j = 1; j < 100; j++)
4535          |     {
4536          |       t = T[j+1][i-1];  // A
4537          |       T[j][i] = t + 2;  // B
4538          |     }
4539
4540          references are collected following the direction of the wind:
4541          A then B.  The data dependence tests are performed also
4542          following this order, such that we're looking at the distance
4543          separating the elements accessed by A from the elements later
4544          accessed by B.  But in this example, the distance returned by
4545          test_dep (A, B) is lexicographically negative (-1, 1), that
4546          means that the access A occurs later than B with respect to
4547          the outer loop, ie. we're actually looking upwind.  In this
4548          case we solve test_dep (B, A) looking downwind to the
4549          lexicographically positive solution, that returns the
4550          distance vector (1, -1).  */
4551       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
4552         {
4553           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4554           if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4555             return false;
4556           compute_subscript_distance (ddr);
4557           if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
4558                                             &index_carry))
4559             return false;
4560           save_dist_v (ddr, save_v);
4561           DDR_REVERSED_P (ddr) = true;
4562
4563           /* In this case there is a dependence forward for all the
4564              outer loops:
4565
4566              | for (k = 1; k < 100; k++)
4567              |  for (i = 1; i < 100; i++)
4568              |   for (j = 1; j < 100; j++)
4569              |     {
4570              |       t = T[j+1][i-1];  // A
4571              |       T[j][i] = t + 2;  // B
4572              |     }
4573
4574              the vectors are:
4575              (0,  1, -1)
4576              (1,  1, -1)
4577              (1, -1,  1)
4578           */
4579           if (DDR_NB_LOOPS (ddr) > 1)
4580             {
4581               add_outer_distances (ddr, save_v, index_carry);
4582               add_outer_distances (ddr, dist_v, index_carry);
4583             }
4584         }
4585       else
4586         {
4587           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4588           lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4589
4590           if (DDR_NB_LOOPS (ddr) > 1)
4591             {
4592               lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4593
4594               if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4595                 return false;
4596               compute_subscript_distance (ddr);
4597               if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
4598                                                 &index_carry))
4599                 return false;
4600
4601               save_dist_v (ddr, save_v);
4602               add_outer_distances (ddr, dist_v, index_carry);
4603               add_outer_distances (ddr, opposite_v, index_carry);
4604             }
4605           else
4606             save_dist_v (ddr, save_v);
4607         }
4608     }
4609   else
4610     {
4611       /* There is a distance of 1 on all the outer loops: Example:
4612          there is a dependence of distance 1 on loop_1 for the array A.
4613
4614          | loop_1
4615          |   A[5] = ...
4616          | endloop
4617       */
4618       add_outer_distances (ddr, dist_v,
4619                            lambda_vector_first_nz (dist_v,
4620                                                    DDR_NB_LOOPS (ddr), 0));
4621     }
4622
4623   if (dump_file && (dump_flags & TDF_DETAILS))
4624     {
4625       unsigned i;
4626
4627       fprintf (dump_file, "(build_classic_dist_vector\n");
4628       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4629         {
4630           fprintf (dump_file, "  dist_vector = (");
4631           print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
4632                                DDR_NB_LOOPS (ddr));
4633           fprintf (dump_file, "  )\n");
4634         }
4635       fprintf (dump_file, ")\n");
4636     }
4637
4638   return true;
4639 }
4640
4641 /* Return the direction for a given distance.
4642    FIXME: Computing dir this way is suboptimal, since dir can catch
4643    cases that dist is unable to represent.  */
4644
4645 static inline enum data_dependence_direction
4646 dir_from_dist (int dist)
4647 {
4648   if (dist > 0)
4649     return dir_positive;
4650   else if (dist < 0)
4651     return dir_negative;
4652   else
4653     return dir_equal;
4654 }
4655
4656 /* Compute the classic per loop direction vector.  DDR is the data
4657    dependence relation to build a vector from.  */
4658
4659 static void
4660 build_classic_dir_vector (struct data_dependence_relation *ddr)
4661 {
4662   unsigned i, j;
4663   lambda_vector dist_v;
4664
4665   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
4666     {
4667       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4668
4669       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4670         dir_v[j] = dir_from_dist (dist_v[j]);
4671
4672       save_dir_v (ddr, dir_v);
4673     }
4674 }
4675
4676 /* Helper function.  Returns true when there is a dependence between the
4677    data references.  A_INDEX is the index of the first reference (0 for
4678    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
4679
4680 static bool
4681 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
4682                                unsigned int a_index, unsigned int b_index,
4683                                struct loop *loop_nest)
4684 {
4685   unsigned int i;
4686   tree last_conflicts;
4687   struct subscript *subscript;
4688   tree res = NULL_TREE;
4689
4690   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
4691     {
4692       conflict_function *overlaps_a, *overlaps_b;
4693
4694       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
4695                                       SUB_ACCESS_FN (subscript, b_index),
4696                                       &overlaps_a, &overlaps_b,
4697                                       &last_conflicts, loop_nest);
4698
4699       if (SUB_CONFLICTS_IN_A (subscript))
4700         free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4701       if (SUB_CONFLICTS_IN_B (subscript))
4702         free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4703
4704       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
4705       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
4706       SUB_LAST_CONFLICT (subscript) = last_conflicts;
4707
4708       /* If there is any undetermined conflict function we have to
4709          give a conservative answer in case we cannot prove that
4710          no dependence exists when analyzing another subscript.  */
4711       if (CF_NOT_KNOWN_P (overlaps_a)
4712           || CF_NOT_KNOWN_P (overlaps_b))
4713         {
4714           res = chrec_dont_know;
4715           continue;
4716         }
4717
4718       /* When there is a subscript with no dependence we can stop.  */
4719       else if (CF_NO_DEPENDENCE_P (overlaps_a)
4720                || CF_NO_DEPENDENCE_P (overlaps_b))
4721         {
4722           res = chrec_known;
4723           break;
4724         }
4725     }
4726
4727   if (res == NULL_TREE)
4728     return true;
4729
4730   if (res == chrec_known)
4731     dependence_stats.num_dependence_independent++;
4732   else
4733     dependence_stats.num_dependence_undetermined++;
4734   finalize_ddr_dependent (ddr, res);
4735   return false;
4736 }
4737
4738 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
4739
4740 static void
4741 subscript_dependence_tester (struct data_dependence_relation *ddr,
4742                              struct loop *loop_nest)
4743 {
4744   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
4745     dependence_stats.num_dependence_dependent++;
4746
4747   compute_subscript_distance (ddr);
4748   if (build_classic_dist_vector (ddr, loop_nest))
4749     build_classic_dir_vector (ddr);
4750 }
4751
4752 /* Returns true when all the access functions of A are affine or
4753    constant with respect to LOOP_NEST.  */
4754
4755 static bool
4756 access_functions_are_affine_or_constant_p (const struct data_reference *a,
4757                                            const struct loop *loop_nest)
4758 {
4759   unsigned int i;
4760   vec<tree> fns = DR_ACCESS_FNS (a);
4761   tree t;
4762
4763   FOR_EACH_VEC_ELT (fns, i, t)
4764     if (!evolution_function_is_invariant_p (t, loop_nest->num)
4765         && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
4766       return false;
4767
4768   return true;
4769 }
4770
4771 /* This computes the affine dependence relation between A and B with
4772    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
4773    independence between two accesses, while CHREC_DONT_KNOW is used
4774    for representing the unknown relation.
4775
4776    Note that it is possible to stop the computation of the dependence
4777    relation the first time we detect a CHREC_KNOWN element for a given
4778    subscript.  */
4779
4780 void
4781 compute_affine_dependence (struct data_dependence_relation *ddr,
4782                            struct loop *loop_nest)
4783 {
4784   struct data_reference *dra = DDR_A (ddr);
4785   struct data_reference *drb = DDR_B (ddr);
4786
4787   if (dump_file && (dump_flags & TDF_DETAILS))
4788     {
4789       fprintf (dump_file, "(compute_affine_dependence\n");
4790       fprintf (dump_file, "  stmt_a: ");
4791       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4792       fprintf (dump_file, "  stmt_b: ");
4793       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4794     }
4795
4796   /* Analyze only when the dependence relation is not yet known.  */
4797   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4798     {
4799       dependence_stats.num_dependence_tests++;
4800
4801       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4802           && access_functions_are_affine_or_constant_p (drb, loop_nest))
4803         subscript_dependence_tester (ddr, loop_nest);
4804
4805       /* As a last case, if the dependence cannot be determined, or if
4806          the dependence is considered too difficult to determine, answer
4807          "don't know".  */
4808       else
4809         {
4810           dependence_stats.num_dependence_undetermined++;
4811
4812           if (dump_file && (dump_flags & TDF_DETAILS))
4813             {
4814               fprintf (dump_file, "Data ref a:\n");
4815               dump_data_reference (dump_file, dra);
4816               fprintf (dump_file, "Data ref b:\n");
4817               dump_data_reference (dump_file, drb);
4818               fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4819             }
4820           finalize_ddr_dependent (ddr, chrec_dont_know);
4821         }
4822     }
4823
4824   if (dump_file && (dump_flags & TDF_DETAILS))
4825     {
4826       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4827         fprintf (dump_file, ") -> no dependence\n");
4828       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4829         fprintf (dump_file, ") -> dependence analysis failed\n");
4830       else
4831         fprintf (dump_file, ")\n");
4832     }
4833 }
4834
4835 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4836    the data references in DATAREFS, in the LOOP_NEST.  When
4837    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4838    relations.  Return true when successful, i.e. data references number
4839    is small enough to be handled.  */
4840
4841 bool
4842 compute_all_dependences (vec<data_reference_p> datarefs,
4843                          vec<ddr_p> *dependence_relations,
4844                          vec<loop_p> loop_nest,
4845                          bool compute_self_and_rr)
4846 {
4847   struct data_dependence_relation *ddr;
4848   struct data_reference *a, *b;
4849   unsigned int i, j;
4850
4851   if ((int) datarefs.length ()
4852       > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4853     {
4854       struct data_dependence_relation *ddr;
4855
4856       /* Insert a single relation into dependence_relations:
4857          chrec_dont_know.  */
4858       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4859       dependence_relations->safe_push (ddr);
4860       return false;
4861     }
4862
4863   FOR_EACH_VEC_ELT (datarefs, i, a)
4864     for (j = i + 1; datarefs.iterate (j, &b); j++)
4865       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4866         {
4867           ddr = initialize_data_dependence_relation (a, b, loop_nest);
4868           dependence_relations->safe_push (ddr);
4869           if (loop_nest.exists ())
4870             compute_affine_dependence (ddr, loop_nest[0]);
4871         }
4872
4873   if (compute_self_and_rr)
4874     FOR_EACH_VEC_ELT (datarefs, i, a)
4875       {
4876         ddr = initialize_data_dependence_relation (a, a, loop_nest);
4877         dependence_relations->safe_push (ddr);
4878         if (loop_nest.exists ())
4879           compute_affine_dependence (ddr, loop_nest[0]);
4880       }
4881
4882   return true;
4883 }
4884
4885 /* Describes a location of a memory reference.  */
4886
4887 struct data_ref_loc
4888 {
4889   /* The memory reference.  */
4890   tree ref;
4891
4892   /* True if the memory reference is read.  */
4893   bool is_read;
4894
4895   /* True if the data reference is conditional within the containing
4896      statement, i.e. if it might not occur even when the statement
4897      is executed and runs to completion.  */
4898   bool is_conditional_in_stmt;
4899 };
4900
4901
4902 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
4903    true if STMT clobbers memory, false otherwise.  */
4904
4905 static bool
4906 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
4907 {
4908   bool clobbers_memory = false;
4909   data_ref_loc ref;
4910   tree op0, op1;
4911   enum gimple_code stmt_code = gimple_code (stmt);
4912
4913   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4914      As we cannot model data-references to not spelled out
4915      accesses give up if they may occur.  */
4916   if (stmt_code == GIMPLE_CALL
4917       && !(gimple_call_flags (stmt) & ECF_CONST))
4918     {
4919       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
4920       if (gimple_call_internal_p (stmt))
4921         switch (gimple_call_internal_fn (stmt))
4922           {
4923           case IFN_GOMP_SIMD_LANE:
4924             {
4925               struct loop *loop = gimple_bb (stmt)->loop_father;
4926               tree uid = gimple_call_arg (stmt, 0);
4927               gcc_assert (TREE_CODE (uid) == SSA_NAME);
4928               if (loop == NULL
4929                   || loop->simduid != SSA_NAME_VAR (uid))
4930                 clobbers_memory = true;
4931               break;
4932             }
4933           case IFN_MASK_LOAD:
4934           case IFN_MASK_STORE:
4935             break;
4936           default:
4937             clobbers_memory = true;
4938             break;
4939           }
4940       else
4941         clobbers_memory = true;
4942     }
4943   else if (stmt_code == GIMPLE_ASM
4944            && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
4945                || gimple_vuse (stmt)))
4946     clobbers_memory = true;
4947
4948   if (!gimple_vuse (stmt))
4949     return clobbers_memory;
4950
4951   if (stmt_code == GIMPLE_ASSIGN)
4952     {
4953       tree base;
4954       op0 = gimple_assign_lhs (stmt);
4955       op1 = gimple_assign_rhs1 (stmt);
4956
4957       if (DECL_P (op1)
4958           || (REFERENCE_CLASS_P (op1)
4959               && (base = get_base_address (op1))
4960               && TREE_CODE (base) != SSA_NAME
4961               && !is_gimple_min_invariant (base)))
4962         {
4963           ref.ref = op1;
4964           ref.is_read = true;
4965           ref.is_conditional_in_stmt = false;
4966           references->safe_push (ref);
4967         }
4968     }
4969   else if (stmt_code == GIMPLE_CALL)
4970     {
4971       unsigned i, n;
4972       tree ptr, type;
4973       unsigned int align;
4974
4975       ref.is_read = false;
4976       if (gimple_call_internal_p (stmt))
4977         switch (gimple_call_internal_fn (stmt))
4978           {
4979           case IFN_MASK_LOAD:
4980             if (gimple_call_lhs (stmt) == NULL_TREE)
4981               break;
4982             ref.is_read = true;
4983             /* FALLTHRU */
4984           case IFN_MASK_STORE:
4985             ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
4986             align = tree_to_shwi (gimple_call_arg (stmt, 1));
4987             if (ref.is_read)
4988               type = TREE_TYPE (gimple_call_lhs (stmt));
4989             else
4990               type = TREE_TYPE (gimple_call_arg (stmt, 3));
4991             if (TYPE_ALIGN (type) != align)
4992               type = build_aligned_type (type, align);
4993             ref.is_conditional_in_stmt = true;
4994             ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
4995                                    ptr);
4996             references->safe_push (ref);
4997             return false;
4998           default:
4999             break;
5000           }
5001
5002       op0 = gimple_call_lhs (stmt);
5003       n = gimple_call_num_args (stmt);
5004       for (i = 0; i < n; i++)
5005         {
5006           op1 = gimple_call_arg (stmt, i);
5007
5008           if (DECL_P (op1)
5009               || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5010             {
5011               ref.ref = op1;
5012               ref.is_read = true;
5013               ref.is_conditional_in_stmt = false;
5014               references->safe_push (ref);
5015             }
5016         }
5017     }
5018   else
5019     return clobbers_memory;
5020
5021   if (op0
5022       && (DECL_P (op0)
5023           || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5024     {
5025       ref.ref = op0;
5026       ref.is_read = false;
5027       ref.is_conditional_in_stmt = false;
5028       references->safe_push (ref);
5029     }
5030   return clobbers_memory;
5031 }
5032
5033
5034 /* Returns true if the loop-nest has any data reference.  */
5035
5036 bool
5037 loop_nest_has_data_refs (loop_p loop)
5038 {
5039   basic_block *bbs = get_loop_body (loop);
5040   auto_vec<data_ref_loc, 3> references;
5041
5042   for (unsigned i = 0; i < loop->num_nodes; i++)
5043     {
5044       basic_block bb = bbs[i];
5045       gimple_stmt_iterator bsi;
5046
5047       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5048         {
5049           gimple *stmt = gsi_stmt (bsi);
5050           get_references_in_stmt (stmt, &references);
5051           if (references.length ())
5052             {
5053               free (bbs);
5054               return true;
5055             }
5056         }
5057     }
5058   free (bbs);
5059   return false;
5060 }
5061
5062 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
5063    reference, returns false, otherwise returns true.  NEST is the outermost
5064    loop of the loop nest in which the references should be analyzed.  */
5065
5066 bool
5067 find_data_references_in_stmt (struct loop *nest, gimple *stmt,
5068                               vec<data_reference_p> *datarefs)
5069 {
5070   unsigned i;
5071   auto_vec<data_ref_loc, 2> references;
5072   data_ref_loc *ref;
5073   bool ret = true;
5074   data_reference_p dr;
5075
5076   if (get_references_in_stmt (stmt, &references))
5077     return false;
5078
5079   FOR_EACH_VEC_ELT (references, i, ref)
5080     {
5081       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5082                             loop_containing_stmt (stmt), ref->ref,
5083                             stmt, ref->is_read, ref->is_conditional_in_stmt);
5084       gcc_assert (dr != NULL);
5085       datarefs->safe_push (dr);
5086     }
5087
5088   return ret;
5089 }
5090
5091 /* Stores the data references in STMT to DATAREFS.  If there is an
5092    unanalyzable reference, returns false, otherwise returns true.
5093    NEST is the outermost loop of the loop nest in which the references
5094    should be instantiated, LOOP is the loop in which the references
5095    should be analyzed.  */
5096
5097 bool
5098 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5099                                        vec<data_reference_p> *datarefs)
5100 {
5101   unsigned i;
5102   auto_vec<data_ref_loc, 2> references;
5103   data_ref_loc *ref;
5104   bool ret = true;
5105   data_reference_p dr;
5106
5107   if (get_references_in_stmt (stmt, &references))
5108     return false;
5109
5110   FOR_EACH_VEC_ELT (references, i, ref)
5111     {
5112       dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
5113                             ref->is_conditional_in_stmt);
5114       gcc_assert (dr != NULL);
5115       datarefs->safe_push (dr);
5116     }
5117
5118   return ret;
5119 }
5120
5121 /* Search the data references in LOOP, and record the information into
5122    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5123    difficult case, returns NULL_TREE otherwise.  */
5124
5125 tree
5126 find_data_references_in_bb (struct loop *loop, basic_block bb,
5127                             vec<data_reference_p> *datarefs)
5128 {
5129   gimple_stmt_iterator bsi;
5130
5131   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5132     {
5133       gimple *stmt = gsi_stmt (bsi);
5134
5135       if (!find_data_references_in_stmt (loop, stmt, datarefs))
5136         {
5137           struct data_reference *res;
5138           res = XCNEW (struct data_reference);
5139           datarefs->safe_push (res);
5140
5141           return chrec_dont_know;
5142         }
5143     }
5144
5145   return NULL_TREE;
5146 }
5147
5148 /* Search the data references in LOOP, and record the information into
5149    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5150    difficult case, returns NULL_TREE otherwise.
5151
5152    TODO: This function should be made smarter so that it can handle address
5153    arithmetic as if they were array accesses, etc.  */
5154
5155 tree
5156 find_data_references_in_loop (struct loop *loop,
5157                               vec<data_reference_p> *datarefs)
5158 {
5159   basic_block bb, *bbs;
5160   unsigned int i;
5161
5162   bbs = get_loop_body_in_dom_order (loop);
5163
5164   for (i = 0; i < loop->num_nodes; i++)
5165     {
5166       bb = bbs[i];
5167
5168       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
5169         {
5170           free (bbs);
5171           return chrec_dont_know;
5172         }
5173     }
5174   free (bbs);
5175
5176   return NULL_TREE;
5177 }
5178
5179 /* Return the alignment in bytes that DRB is guaranteed to have at all
5180    times.  */
5181
5182 unsigned int
5183 dr_alignment (innermost_loop_behavior *drb)
5184 {
5185   /* Get the alignment of BASE_ADDRESS + INIT.  */
5186   unsigned int alignment = drb->base_alignment;
5187   unsigned int misalignment = (drb->base_misalignment
5188                                + TREE_INT_CST_LOW (drb->init));
5189   if (misalignment != 0)
5190     alignment = MIN (alignment, misalignment & -misalignment);
5191
5192   /* Cap it to the alignment of OFFSET.  */
5193   if (!integer_zerop (drb->offset))
5194     alignment = MIN (alignment, drb->offset_alignment);
5195
5196   /* Cap it to the alignment of STEP.  */
5197   if (!integer_zerop (drb->step))
5198     alignment = MIN (alignment, drb->step_alignment);
5199
5200   return alignment;
5201 }
5202
5203 /* Recursive helper function.  */
5204
5205 static bool
5206 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
5207 {
5208   /* Inner loops of the nest should not contain siblings.  Example:
5209      when there are two consecutive loops,
5210
5211      | loop_0
5212      |   loop_1
5213      |     A[{0, +, 1}_1]
5214      |   endloop_1
5215      |   loop_2
5216      |     A[{0, +, 1}_2]
5217      |   endloop_2
5218      | endloop_0
5219
5220      the dependence relation cannot be captured by the distance
5221      abstraction.  */
5222   if (loop->next)
5223     return false;
5224
5225   loop_nest->safe_push (loop);
5226   if (loop->inner)
5227     return find_loop_nest_1 (loop->inner, loop_nest);
5228   return true;
5229 }
5230
5231 /* Return false when the LOOP is not well nested.  Otherwise return
5232    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
5233    contain the loops from the outermost to the innermost, as they will
5234    appear in the classic distance vector.  */
5235
5236 bool
5237 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
5238 {
5239   loop_nest->safe_push (loop);
5240   if (loop->inner)
5241     return find_loop_nest_1 (loop->inner, loop_nest);
5242   return true;
5243 }
5244
5245 /* Returns true when the data dependences have been computed, false otherwise.
5246    Given a loop nest LOOP, the following vectors are returned:
5247    DATAREFS is initialized to all the array elements contained in this loop,
5248    DEPENDENCE_RELATIONS contains the relations between the data references.
5249    Compute read-read and self relations if
5250    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
5251
5252 bool
5253 compute_data_dependences_for_loop (struct loop *loop,
5254                                    bool compute_self_and_read_read_dependences,
5255                                    vec<loop_p> *loop_nest,
5256                                    vec<data_reference_p> *datarefs,
5257                                    vec<ddr_p> *dependence_relations)
5258 {
5259   bool res = true;
5260
5261   memset (&dependence_stats, 0, sizeof (dependence_stats));
5262
5263   /* If the loop nest is not well formed, or one of the data references
5264      is not computable, give up without spending time to compute other
5265      dependences.  */
5266   if (!loop
5267       || !find_loop_nest (loop, loop_nest)
5268       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
5269       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
5270                                    compute_self_and_read_read_dependences))
5271     res = false;
5272
5273   if (dump_file && (dump_flags & TDF_STATS))
5274     {
5275       fprintf (dump_file, "Dependence tester statistics:\n");
5276
5277       fprintf (dump_file, "Number of dependence tests: %d\n",
5278                dependence_stats.num_dependence_tests);
5279       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
5280                dependence_stats.num_dependence_dependent);
5281       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
5282                dependence_stats.num_dependence_independent);
5283       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
5284                dependence_stats.num_dependence_undetermined);
5285
5286       fprintf (dump_file, "Number of subscript tests: %d\n",
5287                dependence_stats.num_subscript_tests);
5288       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
5289                dependence_stats.num_subscript_undetermined);
5290       fprintf (dump_file, "Number of same subscript function: %d\n",
5291                dependence_stats.num_same_subscript_function);
5292
5293       fprintf (dump_file, "Number of ziv tests: %d\n",
5294                dependence_stats.num_ziv);
5295       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
5296                dependence_stats.num_ziv_dependent);
5297       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
5298                dependence_stats.num_ziv_independent);
5299       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
5300                dependence_stats.num_ziv_unimplemented);
5301
5302       fprintf (dump_file, "Number of siv tests: %d\n",
5303                dependence_stats.num_siv);
5304       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
5305                dependence_stats.num_siv_dependent);
5306       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
5307                dependence_stats.num_siv_independent);
5308       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
5309                dependence_stats.num_siv_unimplemented);
5310
5311       fprintf (dump_file, "Number of miv tests: %d\n",
5312                dependence_stats.num_miv);
5313       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
5314                dependence_stats.num_miv_dependent);
5315       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
5316                dependence_stats.num_miv_independent);
5317       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
5318                dependence_stats.num_miv_unimplemented);
5319     }
5320
5321   return res;
5322 }
5323
5324 /* Free the memory used by a data dependence relation DDR.  */
5325
5326 void
5327 free_dependence_relation (struct data_dependence_relation *ddr)
5328 {
5329   if (ddr == NULL)
5330     return;
5331
5332   if (DDR_SUBSCRIPTS (ddr).exists ())
5333     free_subscripts (DDR_SUBSCRIPTS (ddr));
5334   DDR_DIST_VECTS (ddr).release ();
5335   DDR_DIR_VECTS (ddr).release ();
5336
5337   free (ddr);
5338 }
5339
5340 /* Free the memory used by the data dependence relations from
5341    DEPENDENCE_RELATIONS.  */
5342
5343 void
5344 free_dependence_relations (vec<ddr_p> dependence_relations)
5345 {
5346   unsigned int i;
5347   struct data_dependence_relation *ddr;
5348
5349   FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5350     if (ddr)
5351       free_dependence_relation (ddr);
5352
5353   dependence_relations.release ();
5354 }
5355
5356 /* Free the memory used by the data references from DATAREFS.  */
5357
5358 void
5359 free_data_refs (vec<data_reference_p> datarefs)
5360 {
5361   unsigned int i;
5362   struct data_reference *dr;
5363
5364   FOR_EACH_VEC_ELT (datarefs, i, dr)
5365     free_data_ref (dr);
5366   datarefs.release ();
5367 }
5368
5369 /* Common routine implementing both dr_direction_indicator and
5370    dr_zero_step_indicator.  Return USEFUL_MIN if the indicator is known
5371    to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
5372    Return the step as the indicator otherwise.  */
5373
5374 static tree
5375 dr_step_indicator (struct data_reference *dr, int useful_min)
5376 {
5377   tree step = DR_STEP (dr);
5378   STRIP_NOPS (step);
5379   /* Look for cases where the step is scaled by a positive constant
5380      integer, which will often be the access size.  If the multiplication
5381      doesn't change the sign (due to overflow effects) then we can
5382      test the unscaled value instead.  */
5383   if (TREE_CODE (step) == MULT_EXPR
5384       && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
5385       && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
5386     {
5387       tree factor = TREE_OPERAND (step, 1);
5388       step = TREE_OPERAND (step, 0);
5389
5390       /* Strip widening and truncating conversions as well as nops.  */
5391       if (CONVERT_EXPR_P (step)
5392           && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
5393         step = TREE_OPERAND (step, 0);
5394       tree type = TREE_TYPE (step);
5395
5396       /* Get the range of step values that would not cause overflow.  */
5397       widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
5398                          / wi::to_widest (factor));
5399       widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
5400                          / wi::to_widest (factor));
5401
5402       /* Get the range of values that the unconverted step actually has.  */
5403       wide_int step_min, step_max;
5404       if (TREE_CODE (step) != SSA_NAME
5405           || get_range_info (step, &step_min, &step_max) != VR_RANGE)
5406         {
5407           step_min = wi::to_wide (TYPE_MIN_VALUE (type));
5408           step_max = wi::to_wide (TYPE_MAX_VALUE (type));
5409         }
5410
5411       /* Check whether the unconverted step has an acceptable range.  */
5412       signop sgn = TYPE_SIGN (type);
5413       if (wi::les_p (minv, widest_int::from (step_min, sgn))
5414           && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
5415         {
5416           if (wi::ge_p (step_min, useful_min, sgn))
5417             return ssize_int (useful_min);
5418           else if (wi::lt_p (step_max, 0, sgn))
5419             return ssize_int (-1);
5420           else
5421             return fold_convert (ssizetype, step);
5422         }
5423     }
5424   return DR_STEP (dr);
5425 }
5426
5427 /* Return a value that is negative iff DR has a negative step.  */
5428
5429 tree
5430 dr_direction_indicator (struct data_reference *dr)
5431 {
5432   return dr_step_indicator (dr, 0);
5433 }
5434
5435 /* Return a value that is zero iff DR has a zero step.  */
5436
5437 tree
5438 dr_zero_step_indicator (struct data_reference *dr)
5439 {
5440   return dr_step_indicator (dr, 1);
5441 }
5442
5443 /* Return true if DR is known to have a nonnegative (but possibly zero)
5444    step.  */
5445
5446 bool
5447 dr_known_forward_stride_p (struct data_reference *dr)
5448 {
5449   tree indicator = dr_direction_indicator (dr);
5450   tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
5451                                    fold_convert (ssizetype, indicator),
5452                                    ssize_int (0));
5453   return neg_step_val && integer_zerop (neg_step_val);
5454 }