gcc/tree-data-ref.c

   1 /* Data references and dependences detectors.
   2    Copyright (C) 2003-2018 Free Software Foundation, Inc.
   3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass walks a given loop structure searching for array
  22    references.  The information about the array accesses is recorded
  23    in DATA_REFERENCE structures.
  24
  25    The basic test for determining the dependences is:
  26    given two access functions chrec1 and chrec2 to a same array, and
  27    x and y two vectors from the iteration domain, the same element of
  28    the array is accessed twice at iterations x and y if and only if:
  29    |             chrec1 (x) == chrec2 (y).
  30
  31    The goals of this analysis are:
  32
  33    - to determine the independence: the relation between two
  34      independent accesses is qualified with the chrec_known (this
  35      information allows a loop parallelization),
  36
  37    - when two data references access the same data, to qualify the
  38      dependence relation with classic dependence representations:
  39
  40        - distance vectors
  41        - direction vectors
  42        - loop carried level dependence
  43        - polyhedron dependence
  44      or with the chains of recurrences based representation,
  45
  46    - to define a knowledge base for storing the data dependence
  47      information,
  48
  49    - to define an interface to access this data.
  50
  51
  52    Definitions:
  53
  54    - subscript: given two array accesses a subscript is the tuple
  55    composed of the access functions for a given dimension.  Example:
  56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
  57    (f1, g1), (f2, g2), (f3, g3).
  58
  59    - Diophantine equation: an equation whose coefficients and
  60    solutions are integer constants, for example the equation
  61    |   3*x + 2*y = 1
  62    has an integer solution x = 1 and y = -1.
  63
  64    References:
  65
  66    - "Advanced Compilation for High Performance Computing" by Randy
  67    Allen and Ken Kennedy.
  68    http://citeseer.ist.psu.edu/goff91practical.html
  69
  70    - "Loop Transformations for Restructuring Compilers - The Foundations"
  71    by Utpal Banerjee.
  72
  73
  74 */
  75
  76 #include "config.h"
  77 #include "system.h"
  78 #include "coretypes.h"
  79 #include "backend.h"
  80 #include "rtl.h"
  81 #include "tree.h"
  82 #include "gimple.h"
  83 #include "gimple-pretty-print.h"
  84 #include "alias.h"
  85 #include "fold-const.h"
  86 #include "expr.h"
  87 #include "gimple-iterator.h"
  88 #include "tree-ssa-loop-niter.h"
  89 #include "tree-ssa-loop.h"
  90 #include "tree-ssa.h"
  91 #include "cfgloop.h"
  92 #include "tree-data-ref.h"
  93 #include "tree-scalar-evolution.h"
  94 #include "dumpfile.h"
  95 #include "tree-affine.h"
  96 #include "params.h"
  97 #include "builtins.h"
  98 #include "stringpool.h"
  99 #include "tree-vrp.h"
 100 #include "tree-ssanames.h"
 101 #include "tree-eh.h"
 102
 103 static struct datadep_stats
 104 {
 105   int num_dependence_tests;
 106   int num_dependence_dependent;
 107   int num_dependence_independent;
 108   int num_dependence_undetermined;
 109
 110   int num_subscript_tests;
 111   int num_subscript_undetermined;
 112   int num_same_subscript_function;
 113
 114   int num_ziv;
 115   int num_ziv_independent;
 116   int num_ziv_dependent;
 117   int num_ziv_unimplemented;
 118
 119   int num_siv;
 120   int num_siv_independent;
 121   int num_siv_dependent;
 122   int num_siv_unimplemented;
 123
 124   int num_miv;
 125   int num_miv_independent;
 126   int num_miv_dependent;
 127   int num_miv_unimplemented;
 128 } dependence_stats;
 129
 130 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
 131                                            unsigned int, unsigned int,
 132                                            struct loop *);
 133 /* Returns true iff A divides B.  */
 134
 135 static inline bool
 136 tree_fold_divides_p (const_tree a, const_tree b)
 137 {
 138   gcc_assert (TREE_CODE (a) == INTEGER_CST);
 139   gcc_assert (TREE_CODE (b) == INTEGER_CST);
 140   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
 141 }
 142
 143 /* Returns true iff A divides B.  */
 144
 145 static inline bool
 146 int_divides_p (int a, int b)
 147 {
 148   return ((b % a) == 0);
 149 }
 150
 151 /* Return true if reference REF contains a union access.  */
 152
 153 static bool
 154 ref_contains_union_access_p (tree ref)
 155 {
 156   while (handled_component_p (ref))
 157     {
 158       ref = TREE_OPERAND (ref, 0);
 159       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
 160           || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
 161         return true;
 162     }
 163   return false;
 164 }
 165
 166 \f
 167
 168 /* Dump into FILE all the data references from DATAREFS.  */
 169
 170 static void
 171 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
 172 {
 173   unsigned int i;
 174   struct data_reference *dr;
 175
 176   FOR_EACH_VEC_ELT (datarefs, i, dr)
 177     dump_data_reference (file, dr);
 178 }
 179
 180 /* Unified dump into FILE all the data references from DATAREFS.  */
 181
 182 DEBUG_FUNCTION void
 183 debug (vec<data_reference_p> &ref)
 184 {
 185   dump_data_references (stderr, ref);
 186 }
 187
 188 DEBUG_FUNCTION void
 189 debug (vec<data_reference_p> *ptr)
 190 {
 191   if (ptr)
 192     debug (*ptr);
 193   else
 194     fprintf (stderr, "<nil>\n");
 195 }
 196
 197
 198 /* Dump into STDERR all the data references from DATAREFS.  */
 199
 200 DEBUG_FUNCTION void
 201 debug_data_references (vec<data_reference_p> datarefs)
 202 {
 203   dump_data_references (stderr, datarefs);
 204 }
 205
 206 /* Print to STDERR the data_reference DR.  */
 207
 208 DEBUG_FUNCTION void
 209 debug_data_reference (struct data_reference *dr)
 210 {
 211   dump_data_reference (stderr, dr);
 212 }
 213
 214 /* Dump function for a DATA_REFERENCE structure.  */
 215
 216 void
 217 dump_data_reference (FILE *outf,
 218                      struct data_reference *dr)
 219 {
 220   unsigned int i;
 221
 222   fprintf (outf, "#(Data Ref: \n");
 223   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
 224   fprintf (outf, "#  stmt: ");
 225   print_gimple_stmt (outf, DR_STMT (dr), 0);
 226   fprintf (outf, "#  ref: ");
 227   print_generic_stmt (outf, DR_REF (dr));
 228   fprintf (outf, "#  base_object: ");
 229   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
 230
 231   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
 232     {
 233       fprintf (outf, "#  Access function %d: ", i);
 234       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
 235     }
 236   fprintf (outf, "#)\n");
 237 }
 238
 239 /* Unified dump function for a DATA_REFERENCE structure.  */
 240
 241 DEBUG_FUNCTION void
 242 debug (data_reference &ref)
 243 {
 244   dump_data_reference (stderr, &ref);
 245 }
 246
 247 DEBUG_FUNCTION void
 248 debug (data_reference *ptr)
 249 {
 250   if (ptr)
 251     debug (*ptr);
 252   else
 253     fprintf (stderr, "<nil>\n");
 254 }
 255
 256
 257 /* Dumps the affine function described by FN to the file OUTF.  */
 258
 259 DEBUG_FUNCTION void
 260 dump_affine_function (FILE *outf, affine_fn fn)
 261 {
 262   unsigned i;
 263   tree coef;
 264
 265   print_generic_expr (outf, fn[0], TDF_SLIM);
 266   for (i = 1; fn.iterate (i, &coef); i++)
 267     {
 268       fprintf (outf, " + ");
 269       print_generic_expr (outf, coef, TDF_SLIM);
 270       fprintf (outf, " * x_%u", i);
 271     }
 272 }
 273
 274 /* Dumps the conflict function CF to the file OUTF.  */
 275
 276 DEBUG_FUNCTION void
 277 dump_conflict_function (FILE *outf, conflict_function *cf)
 278 {
 279   unsigned i;
 280
 281   if (cf->n == NO_DEPENDENCE)
 282     fprintf (outf, "no dependence");
 283   else if (cf->n == NOT_KNOWN)
 284     fprintf (outf, "not known");
 285   else
 286     {
 287       for (i = 0; i < cf->n; i++)
 288         {
 289           if (i != 0)
 290             fprintf (outf, " ");
 291           fprintf (outf, "[");
 292           dump_affine_function (outf, cf->fns[i]);
 293           fprintf (outf, "]");
 294         }
 295     }
 296 }
 297
 298 /* Dump function for a SUBSCRIPT structure.  */
 299
 300 DEBUG_FUNCTION void
 301 dump_subscript (FILE *outf, struct subscript *subscript)
 302 {
 303   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
 304
 305   fprintf (outf, "\n (subscript \n");
 306   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
 307   dump_conflict_function (outf, cf);
 308   if (CF_NONTRIVIAL_P (cf))
 309     {
 310       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 311       fprintf (outf, "\n  last_conflict: ");
 312       print_generic_expr (outf, last_iteration);
 313     }
 314
 315   cf = SUB_CONFLICTS_IN_B (subscript);
 316   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
 317   dump_conflict_function (outf, cf);
 318   if (CF_NONTRIVIAL_P (cf))
 319     {
 320       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 321       fprintf (outf, "\n  last_conflict: ");
 322       print_generic_expr (outf, last_iteration);
 323     }
 324
 325   fprintf (outf, "\n  (Subscript distance: ");
 326   print_generic_expr (outf, SUB_DISTANCE (subscript));
 327   fprintf (outf, " ))\n");
 328 }
 329
 330 /* Print the classic direction vector DIRV to OUTF.  */
 331
 332 DEBUG_FUNCTION void
 333 print_direction_vector (FILE *outf,
 334                         lambda_vector dirv,
 335                         int length)
 336 {
 337   int eq;
 338
 339   for (eq = 0; eq < length; eq++)
 340     {
 341       enum data_dependence_direction dir = ((enum data_dependence_direction)
 342                                             dirv[eq]);
 343
 344       switch (dir)
 345         {
 346         case dir_positive:
 347           fprintf (outf, "    +");
 348           break;
 349         case dir_negative:
 350           fprintf (outf, "    -");
 351           break;
 352         case dir_equal:
 353           fprintf (outf, "    =");
 354           break;
 355         case dir_positive_or_equal:
 356           fprintf (outf, "   +=");
 357           break;
 358         case dir_positive_or_negative:
 359           fprintf (outf, "   +-");
 360           break;
 361         case dir_negative_or_equal:
 362           fprintf (outf, "   -=");
 363           break;
 364         case dir_star:
 365           fprintf (outf, "    *");
 366           break;
 367         default:
 368           fprintf (outf, "indep");
 369           break;
 370         }
 371     }
 372   fprintf (outf, "\n");
 373 }
 374
 375 /* Print a vector of direction vectors.  */
 376
 377 DEBUG_FUNCTION void
 378 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
 379                    int length)
 380 {
 381   unsigned j;
 382   lambda_vector v;
 383
 384   FOR_EACH_VEC_ELT (dir_vects, j, v)
 385     print_direction_vector (outf, v, length);
 386 }
 387
 388 /* Print out a vector VEC of length N to OUTFILE.  */
 389
 390 DEBUG_FUNCTION void
 391 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
 392 {
 393   int i;
 394
 395   for (i = 0; i < n; i++)
 396     fprintf (outfile, "%3d ", vector[i]);
 397   fprintf (outfile, "\n");
 398 }
 399
 400 /* Print a vector of distance vectors.  */
 401
 402 DEBUG_FUNCTION void
 403 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
 404                     int length)
 405 {
 406   unsigned j;
 407   lambda_vector v;
 408
 409   FOR_EACH_VEC_ELT (dist_vects, j, v)
 410     print_lambda_vector (outf, v, length);
 411 }
 412
 413 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
 414
 415 DEBUG_FUNCTION void
 416 dump_data_dependence_relation (FILE *outf,
 417                                struct data_dependence_relation *ddr)
 418 {
 419   struct data_reference *dra, *drb;
 420
 421   fprintf (outf, "(Data Dep: \n");
 422
 423   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 424     {
 425       if (ddr)
 426         {
 427           dra = DDR_A (ddr);
 428           drb = DDR_B (ddr);
 429           if (dra)
 430             dump_data_reference (outf, dra);
 431           else
 432             fprintf (outf, "    (nil)\n");
 433           if (drb)
 434             dump_data_reference (outf, drb);
 435           else
 436             fprintf (outf, "    (nil)\n");
 437         }
 438       fprintf (outf, "    (don't know)\n)\n");
 439       return;
 440     }
 441
 442   dra = DDR_A (ddr);
 443   drb = DDR_B (ddr);
 444   dump_data_reference (outf, dra);
 445   dump_data_reference (outf, drb);
 446
 447   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 448     fprintf (outf, "    (no dependence)\n");
 449
 450   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
 451     {
 452       unsigned int i;
 453       struct loop *loopi;
 454
 455       subscript *sub;
 456       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
 457         {
 458           fprintf (outf, "  access_fn_A: ");
 459           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
 460           fprintf (outf, "  access_fn_B: ");
 461           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
 462           dump_subscript (outf, sub);
 463         }
 464
 465       fprintf (outf, "  inner loop index: %d\n", DDR_INNER_LOOP (ddr));
 466       fprintf (outf, "  loop nest: (");
 467       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
 468         fprintf (outf, "%d ", loopi->num);
 469       fprintf (outf, ")\n");
 470
 471       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
 472         {
 473           fprintf (outf, "  distance_vector: ");
 474           print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
 475                                DDR_NB_LOOPS (ddr));
 476         }
 477
 478       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
 479         {
 480           fprintf (outf, "  direction_vector: ");
 481           print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
 482                                   DDR_NB_LOOPS (ddr));
 483         }
 484     }
 485
 486   fprintf (outf, ")\n");
 487 }
 488
 489 /* Debug version.  */
 490
 491 DEBUG_FUNCTION void
 492 debug_data_dependence_relation (struct data_dependence_relation *ddr)
 493 {
 494   dump_data_dependence_relation (stderr, ddr);
 495 }
 496
 497 /* Dump into FILE all the dependence relations from DDRS.  */
 498
 499 DEBUG_FUNCTION void
 500 dump_data_dependence_relations (FILE *file,
 501                                 vec<ddr_p> ddrs)
 502 {
 503   unsigned int i;
 504   struct data_dependence_relation *ddr;
 505
 506   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 507     dump_data_dependence_relation (file, ddr);
 508 }
 509
 510 DEBUG_FUNCTION void
 511 debug (vec<ddr_p> &ref)
 512 {
 513   dump_data_dependence_relations (stderr, ref);
 514 }
 515
 516 DEBUG_FUNCTION void
 517 debug (vec<ddr_p> *ptr)
 518 {
 519   if (ptr)
 520     debug (*ptr);
 521   else
 522     fprintf (stderr, "<nil>\n");
 523 }
 524
 525
 526 /* Dump to STDERR all the dependence relations from DDRS.  */
 527
 528 DEBUG_FUNCTION void
 529 debug_data_dependence_relations (vec<ddr_p> ddrs)
 530 {
 531   dump_data_dependence_relations (stderr, ddrs);
 532 }
 533
 534 /* Dumps the distance and direction vectors in FILE.  DDRS contains
 535    the dependence relations, and VECT_SIZE is the size of the
 536    dependence vectors, or in other words the number of loops in the
 537    considered nest.  */
 538
 539 DEBUG_FUNCTION void
 540 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
 541 {
 542   unsigned int i, j;
 543   struct data_dependence_relation *ddr;
 544   lambda_vector v;
 545
 546   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 547     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
 548       {
 549         FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
 550           {
 551             fprintf (file, "DISTANCE_V (");
 552             print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
 553             fprintf (file, ")\n");
 554           }
 555
 556         FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
 557           {
 558             fprintf (file, "DIRECTION_V (");
 559             print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
 560             fprintf (file, ")\n");
 561           }
 562       }
 563
 564   fprintf (file, "\n\n");
 565 }
 566
 567 /* Dumps the data dependence relations DDRS in FILE.  */
 568
 569 DEBUG_FUNCTION void
 570 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
 571 {
 572   unsigned int i;
 573   struct data_dependence_relation *ddr;
 574
 575   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 576     dump_data_dependence_relation (file, ddr);
 577
 578   fprintf (file, "\n\n");
 579 }
 580
 581 DEBUG_FUNCTION void
 582 debug_ddrs (vec<ddr_p> ddrs)
 583 {
 584   dump_ddrs (stderr, ddrs);
 585 }
 586
 587 /* Helper function for split_constant_offset.  Expresses OP0 CODE OP1
 588    (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
 589    constant of type ssizetype, and returns true.  If we cannot do this
 590    with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
 591    is returned.  */
 592
 593 static bool
 594 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 595                          tree *var, tree *off)
 596 {
 597   tree var0, var1;
 598   tree off0, off1;
 599   enum tree_code ocode = code;
 600
 601   *var = NULL_TREE;
 602   *off = NULL_TREE;
 603
 604   switch (code)
 605     {
 606     case INTEGER_CST:
 607       *var = build_int_cst (type, 0);
 608       *off = fold_convert (ssizetype, op0);
 609       return true;
 610
 611     case POINTER_PLUS_EXPR:
 612       ocode = PLUS_EXPR;
 613       /* FALLTHROUGH */
 614     case PLUS_EXPR:
 615     case MINUS_EXPR:
 616       split_constant_offset (op0, &var0, &off0);
 617       split_constant_offset (op1, &var1, &off1);
 618       *var = fold_build2 (code, type, var0, var1);
 619       *off = size_binop (ocode, off0, off1);
 620       return true;
 621
 622     case MULT_EXPR:
 623       if (TREE_CODE (op1) != INTEGER_CST)
 624         return false;
 625
 626       split_constant_offset (op0, &var0, &off0);
 627       *var = fold_build2 (MULT_EXPR, type, var0, op1);
 628       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
 629       return true;
 630
 631     case ADDR_EXPR:
 632       {
 633         tree base, poffset;
 634         poly_int64 pbitsize, pbitpos, pbytepos;
 635         machine_mode pmode;
 636         int punsignedp, preversep, pvolatilep;
 637
 638         op0 = TREE_OPERAND (op0, 0);
 639         base
 640           = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
 641                                  &punsignedp, &preversep, &pvolatilep);
 642
 643         if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 644           return false;
 645         base = build_fold_addr_expr (base);
 646         off0 = ssize_int (pbytepos);
 647
 648         if (poffset)
 649           {
 650             split_constant_offset (poffset, &poffset, &off1);
 651             off0 = size_binop (PLUS_EXPR, off0, off1);
 652             if (POINTER_TYPE_P (TREE_TYPE (base)))
 653               base = fold_build_pointer_plus (base, poffset);
 654             else
 655               base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
 656                                   fold_convert (TREE_TYPE (base), poffset));
 657           }
 658
 659         var0 = fold_convert (type, base);
 660
 661         /* If variable length types are involved, punt, otherwise casts
 662            might be converted into ARRAY_REFs in gimplify_conversion.
 663            To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
 664            possibly no longer appears in current GIMPLE, might resurface.
 665            This perhaps could run
 666            if (CONVERT_EXPR_P (var0))
 667              {
 668                gimplify_conversion (&var0);
 669                // Attempt to fill in any within var0 found ARRAY_REF's
 670                // element size from corresponding op embedded ARRAY_REF,
 671                // if unsuccessful, just punt.
 672              }  */
 673         while (POINTER_TYPE_P (type))
 674           type = TREE_TYPE (type);
 675         if (int_size_in_bytes (type) < 0)
 676           return false;
 677
 678         *var = var0;
 679         *off = off0;
 680         return true;
 681       }
 682
 683     case SSA_NAME:
 684       {
 685         if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
 686           return false;
 687
 688         gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
 689         enum tree_code subcode;
 690
 691         if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
 692           return false;
 693
 694         var0 = gimple_assign_rhs1 (def_stmt);
 695         subcode = gimple_assign_rhs_code (def_stmt);
 696         var1 = gimple_assign_rhs2 (def_stmt);
 697
 698         return split_constant_offset_1 (type, var0, subcode, var1, var, off);
 699       }
 700     CASE_CONVERT:
 701       {
 702         /* We must not introduce undefined overflow, and we must not change the value.
 703            Hence we're okay if the inner type doesn't overflow to start with
 704            (pointer or signed), the outer type also is an integer or pointer
 705            and the outer precision is at least as large as the inner.  */
 706         tree itype = TREE_TYPE (op0);
 707         if ((POINTER_TYPE_P (itype)
 708              || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
 709             && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
 710             && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
 711           {
 712             if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype))
 713               {
 714                 /* Split the unconverted operand and try to prove that
 715                    wrapping isn't a problem.  */
 716                 tree tmp_var, tmp_off;
 717                 split_constant_offset (op0, &tmp_var, &tmp_off);
 718
 719                 /* See whether we have an SSA_NAME whose range is known
 720                    to be [A, B].  */
 721                 if (TREE_CODE (tmp_var) != SSA_NAME)
 722                   return false;
 723                 wide_int var_min, var_max;
 724                 if (get_range_info (tmp_var, &var_min, &var_max) != VR_RANGE)
 725                   return false;
 726
 727                 /* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
 728                    is known to be [A + TMP_OFF, B + TMP_OFF], with all
 729                    operations done in ITYPE.  The addition must overflow
 730                    at both ends of the range or at neither.  */
 731                 bool overflow[2];
 732                 signop sgn = TYPE_SIGN (itype);
 733                 unsigned int prec = TYPE_PRECISION (itype);
 734                 wide_int woff = wi::to_wide (tmp_off, prec);
 735                 wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
 736                 wi::add (var_max, woff, sgn, &overflow[1]);
 737                 if (overflow[0] != overflow[1])
 738                   return false;
 739
 740                 /* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR.  */
 741                 widest_int diff = (widest_int::from (op0_min, sgn)
 742                                    - widest_int::from (var_min, sgn));
 743                 var0 = tmp_var;
 744                 *off = wide_int_to_tree (ssizetype, diff);
 745               }
 746             else
 747               split_constant_offset (op0, &var0, off);
 748             *var = fold_convert (type, var0);
 749             return true;
 750           }
 751         return false;
 752       }
 753
 754     default:
 755       return false;
 756     }
 757 }
 758
 759 /* Expresses EXP as VAR + OFF, where off is a constant.  The type of OFF
 760    will be ssizetype.  */
 761
 762 void
 763 split_constant_offset (tree exp, tree *var, tree *off)
 764 {
 765   tree type = TREE_TYPE (exp), op0, op1, e, o;
 766   enum tree_code code;
 767
 768   *var = exp;
 769   *off = ssize_int (0);
 770
 771   if (tree_is_chrec (exp)
 772       || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
 773     return;
 774
 775   code = TREE_CODE (exp);
 776   extract_ops_from_tree (exp, &code, &op0, &op1);
 777   if (split_constant_offset_1 (type, op0, code, op1, &e, &o))
 778     {
 779       *var = e;
 780       *off = o;
 781     }
 782 }
 783
 784 /* Returns the address ADDR of an object in a canonical shape (without nop
 785    casts, and with type of pointer to the object).  */
 786
 787 static tree
 788 canonicalize_base_object_address (tree addr)
 789 {
 790   tree orig = addr;
 791
 792   STRIP_NOPS (addr);
 793
 794   /* The base address may be obtained by casting from integer, in that case
 795      keep the cast.  */
 796   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
 797     return orig;
 798
 799   if (TREE_CODE (addr) != ADDR_EXPR)
 800     return addr;
 801
 802   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
 803 }
 804
 805 /* Analyze the behavior of memory reference REF.  There are two modes:
 806
 807    - BB analysis.  In this case we simply split the address into base,
 808      init and offset components, without reference to any containing loop.
 809      The resulting base and offset are general expressions and they can
 810      vary arbitrarily from one iteration of the containing loop to the next.
 811      The step is always zero.
 812
 813    - loop analysis.  In this case we analyze the reference both wrt LOOP
 814      and on the basis that the reference occurs (is "used") in LOOP;
 815      see the comment above analyze_scalar_evolution_in_loop for more
 816      information about this distinction.  The base, init, offset and
 817      step fields are all invariant in LOOP.
 818
 819    Perform BB analysis if LOOP is null, or if LOOP is the function's
 820    dummy outermost loop.  In other cases perform loop analysis.
 821
 822    Return true if the analysis succeeded and store the results in DRB if so.
 823    BB analysis can only fail for bitfield or reversed-storage accesses.  */
 824
 825 bool
 826 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
 827                       struct loop *loop)
 828 {
 829   poly_int64 pbitsize, pbitpos;
 830   tree base, poffset;
 831   machine_mode pmode;
 832   int punsignedp, preversep, pvolatilep;
 833   affine_iv base_iv, offset_iv;
 834   tree init, dinit, step;
 835   bool in_loop = (loop && loop->num);
 836
 837   if (dump_file && (dump_flags & TDF_DETAILS))
 838     fprintf (dump_file, "analyze_innermost: ");
 839
 840   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
 841                               &punsignedp, &preversep, &pvolatilep);
 842   gcc_assert (base != NULL_TREE);
 843
 844   poly_int64 pbytepos;
 845   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 846     {
 847       if (dump_file && (dump_flags & TDF_DETAILS))
 848         fprintf (dump_file, "failed: bit offset alignment.\n");
 849       return false;
 850     }
 851
 852   if (preversep)
 853     {
 854       if (dump_file && (dump_flags & TDF_DETAILS))
 855         fprintf (dump_file, "failed: reverse storage order.\n");
 856       return false;
 857     }
 858
 859   /* Calculate the alignment and misalignment for the inner reference.  */
 860   unsigned int HOST_WIDE_INT bit_base_misalignment;
 861   unsigned int bit_base_alignment;
 862   get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
 863
 864   /* There are no bitfield references remaining in BASE, so the values
 865      we got back must be whole bytes.  */
 866   gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
 867               && bit_base_misalignment % BITS_PER_UNIT == 0);
 868   unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
 869   poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
 870
 871   if (TREE_CODE (base) == MEM_REF)
 872     {
 873       if (!integer_zerop (TREE_OPERAND (base, 1)))
 874         {
 875           /* Subtract MOFF from the base and add it to POFFSET instead.
 876              Adjust the misalignment to reflect the amount we subtracted.  */
 877           poly_offset_int moff = mem_ref_offset (base);
 878           base_misalignment -= moff.force_shwi ();
 879           tree mofft = wide_int_to_tree (sizetype, moff);
 880           if (!poffset)
 881             poffset = mofft;
 882           else
 883             poffset = size_binop (PLUS_EXPR, poffset, mofft);
 884         }
 885       base = TREE_OPERAND (base, 0);
 886     }
 887   else
 888     base = build_fold_addr_expr (base);
 889
 890   if (in_loop)
 891     {
 892       if (!simple_iv (loop, loop, base, &base_iv, true))
 893         {
 894           if (dump_file && (dump_flags & TDF_DETAILS))
 895             fprintf (dump_file, "failed: evolution of base is not affine.\n");
 896           return false;
 897         }
 898     }
 899   else
 900     {
 901       base_iv.base = base;
 902       base_iv.step = ssize_int (0);
 903       base_iv.no_overflow = true;
 904     }
 905
 906   if (!poffset)
 907     {
 908       offset_iv.base = ssize_int (0);
 909       offset_iv.step = ssize_int (0);
 910     }
 911   else
 912     {
 913       if (!in_loop)
 914         {
 915           offset_iv.base = poffset;
 916           offset_iv.step = ssize_int (0);
 917         }
 918       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
 919         {
 920           if (dump_file && (dump_flags & TDF_DETAILS))
 921             fprintf (dump_file, "failed: evolution of offset is not affine.\n");
 922           return false;
 923         }
 924     }
 925
 926   init = ssize_int (pbytepos);
 927
 928   /* Subtract any constant component from the base and add it to INIT instead.
 929      Adjust the misalignment to reflect the amount we subtracted.  */
 930   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
 931   init = size_binop (PLUS_EXPR, init, dinit);
 932   base_misalignment -= TREE_INT_CST_LOW (dinit);
 933
 934   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
 935   init = size_binop (PLUS_EXPR, init, dinit);
 936
 937   step = size_binop (PLUS_EXPR,
 938                      fold_convert (ssizetype, base_iv.step),
 939                      fold_convert (ssizetype, offset_iv.step));
 940
 941   base = canonicalize_base_object_address (base_iv.base);
 942
 943   /* See if get_pointer_alignment can guarantee a higher alignment than
 944      the one we calculated above.  */
 945   unsigned int HOST_WIDE_INT alt_misalignment;
 946   unsigned int alt_alignment;
 947   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
 948
 949   /* As above, these values must be whole bytes.  */
 950   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
 951               && alt_misalignment % BITS_PER_UNIT == 0);
 952   alt_alignment /= BITS_PER_UNIT;
 953   alt_misalignment /= BITS_PER_UNIT;
 954
 955   if (base_alignment < alt_alignment)
 956     {
 957       base_alignment = alt_alignment;
 958       base_misalignment = alt_misalignment;
 959     }
 960
 961   drb->base_address = base;
 962   drb->offset = fold_convert (ssizetype, offset_iv.base);
 963   drb->init = init;
 964   drb->step = step;
 965   if (known_misalignment (base_misalignment, base_alignment,
 966                           &drb->base_misalignment))
 967     drb->base_alignment = base_alignment;
 968   else
 969     {
 970       drb->base_alignment = known_alignment (base_misalignment);
 971       drb->base_misalignment = 0;
 972     }
 973   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
 974   drb->step_alignment = highest_pow2_factor (step);
 975
 976   if (dump_file && (dump_flags & TDF_DETAILS))
 977     fprintf (dump_file, "success.\n");
 978
 979   return true;
 980 }
 981
 982 /* Return true if OP is a valid component reference for a DR access
 983    function.  This accepts a subset of what handled_component_p accepts.  */
 984
 985 static bool
 986 access_fn_component_p (tree op)
 987 {
 988   switch (TREE_CODE (op))
 989     {
 990     case REALPART_EXPR:
 991     case IMAGPART_EXPR:
 992     case ARRAY_REF:
 993       return true;
 994
 995     case COMPONENT_REF:
 996       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
 997
 998     default:
 999       return false;
1000     }
1001 }
1002
1003 /* Determines the base object and the list of indices of memory reference
1004    DR, analyzed in LOOP and instantiated before NEST.  */
1005
1006 static void
1007 dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
1008 {
1009   vec<tree> access_fns = vNULL;
1010   tree ref, op;
1011   tree base, off, access_fn;
1012
1013   /* If analyzing a basic-block there are no indices to analyze
1014      and thus no access functions.  */
1015   if (!nest)
1016     {
1017       DR_BASE_OBJECT (dr) = DR_REF (dr);
1018       DR_ACCESS_FNS (dr).create (0);
1019       return;
1020     }
1021
1022   ref = DR_REF (dr);
1023
1024   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1025      into a two element array with a constant index.  The base is
1026      then just the immediate underlying object.  */
1027   if (TREE_CODE (ref) == REALPART_EXPR)
1028     {
1029       ref = TREE_OPERAND (ref, 0);
1030       access_fns.safe_push (integer_zero_node);
1031     }
1032   else if (TREE_CODE (ref) == IMAGPART_EXPR)
1033     {
1034       ref = TREE_OPERAND (ref, 0);
1035       access_fns.safe_push (integer_one_node);
1036     }
1037
1038   /* Analyze access functions of dimensions we know to be independent.
1039      The list of component references handled here should be kept in
1040      sync with access_fn_component_p.  */
1041   while (handled_component_p (ref))
1042     {
1043       if (TREE_CODE (ref) == ARRAY_REF)
1044         {
1045           op = TREE_OPERAND (ref, 1);
1046           access_fn = analyze_scalar_evolution (loop, op);
1047           access_fn = instantiate_scev (nest, loop, access_fn);
1048           access_fns.safe_push (access_fn);
1049         }
1050       else if (TREE_CODE (ref) == COMPONENT_REF
1051                && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1052         {
1053           /* For COMPONENT_REFs of records (but not unions!) use the
1054              FIELD_DECL offset as constant access function so we can
1055              disambiguate a[i].f1 and a[i].f2.  */
1056           tree off = component_ref_field_offset (ref);
1057           off = size_binop (PLUS_EXPR,
1058                             size_binop (MULT_EXPR,
1059                                         fold_convert (bitsizetype, off),
1060                                         bitsize_int (BITS_PER_UNIT)),
1061                             DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1062           access_fns.safe_push (off);
1063         }
1064       else
1065         /* If we have an unhandled component we could not translate
1066            to an access function stop analyzing.  We have determined
1067            our base object in this case.  */
1068         break;
1069
1070       ref = TREE_OPERAND (ref, 0);
1071     }
1072
1073   /* If the address operand of a MEM_REF base has an evolution in the
1074      analyzed nest, add it as an additional independent access-function.  */
1075   if (TREE_CODE (ref) == MEM_REF)
1076     {
1077       op = TREE_OPERAND (ref, 0);
1078       access_fn = analyze_scalar_evolution (loop, op);
1079       access_fn = instantiate_scev (nest, loop, access_fn);
1080       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1081         {
1082           tree orig_type;
1083           tree memoff = TREE_OPERAND (ref, 1);
1084           base = initial_condition (access_fn);
1085           orig_type = TREE_TYPE (base);
1086           STRIP_USELESS_TYPE_CONVERSION (base);
1087           split_constant_offset (base, &base, &off);
1088           STRIP_USELESS_TYPE_CONVERSION (base);
1089           /* Fold the MEM_REF offset into the evolutions initial
1090              value to make more bases comparable.  */
1091           if (!integer_zerop (memoff))
1092             {
1093               off = size_binop (PLUS_EXPR, off,
1094                                 fold_convert (ssizetype, memoff));
1095               memoff = build_int_cst (TREE_TYPE (memoff), 0);
1096             }
1097           /* Adjust the offset so it is a multiple of the access type
1098              size and thus we separate bases that can possibly be used
1099              to produce partial overlaps (which the access_fn machinery
1100              cannot handle).  */
1101           wide_int rem;
1102           if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1103               && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1104               && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1105             rem = wi::mod_trunc
1106               (wi::to_wide (off),
1107                wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1108                SIGNED);
1109           else
1110             /* If we can't compute the remainder simply force the initial
1111                condition to zero.  */
1112             rem = wi::to_wide (off);
1113           off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1114           memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1115           /* And finally replace the initial condition.  */
1116           access_fn = chrec_replace_initial_condition
1117               (access_fn, fold_convert (orig_type, off));
1118           /* ???  This is still not a suitable base object for
1119              dr_may_alias_p - the base object needs to be an
1120              access that covers the object as whole.  With
1121              an evolution in the pointer this cannot be
1122              guaranteed.
1123              As a band-aid, mark the access so we can special-case
1124              it in dr_may_alias_p.  */
1125           tree old = ref;
1126           ref = fold_build2_loc (EXPR_LOCATION (ref),
1127                                  MEM_REF, TREE_TYPE (ref),
1128                                  base, memoff);
1129           MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1130           MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1131           DR_UNCONSTRAINED_BASE (dr) = true;
1132           access_fns.safe_push (access_fn);
1133         }
1134     }
1135   else if (DECL_P (ref))
1136     {
1137       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1138       ref = build2 (MEM_REF, TREE_TYPE (ref),
1139                     build_fold_addr_expr (ref),
1140                     build_int_cst (reference_alias_ptr_type (ref), 0));
1141     }
1142
1143   DR_BASE_OBJECT (dr) = ref;
1144   DR_ACCESS_FNS (dr) = access_fns;
1145 }
1146
1147 /* Extracts the alias analysis information from the memory reference DR.  */
1148
1149 static void
1150 dr_analyze_alias (struct data_reference *dr)
1151 {
1152   tree ref = DR_REF (dr);
1153   tree base = get_base_address (ref), addr;
1154
1155   if (INDIRECT_REF_P (base)
1156       || TREE_CODE (base) == MEM_REF)
1157     {
1158       addr = TREE_OPERAND (base, 0);
1159       if (TREE_CODE (addr) == SSA_NAME)
1160         DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1161     }
1162 }
1163
1164 /* Frees data reference DR.  */
1165
1166 void
1167 free_data_ref (data_reference_p dr)
1168 {
1169   DR_ACCESS_FNS (dr).release ();
1170   free (dr);
1171 }
1172
1173 /* Analyze memory reference MEMREF, which is accessed in STMT.
1174    The reference is a read if IS_READ is true, otherwise it is a write.
1175    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1176    within STMT, i.e. that it might not occur even if STMT is executed
1177    and runs to completion.
1178
1179    Return the data_reference description of MEMREF.  NEST is the outermost
1180    loop in which the reference should be instantiated, LOOP is the loop
1181    in which the data reference should be analyzed.  */
1182
1183 struct data_reference *
1184 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1185                  bool is_read, bool is_conditional_in_stmt)
1186 {
1187   struct data_reference *dr;
1188
1189   if (dump_file && (dump_flags & TDF_DETAILS))
1190     {
1191       fprintf (dump_file, "Creating dr for ");
1192       print_generic_expr (dump_file, memref, TDF_SLIM);
1193       fprintf (dump_file, "\n");
1194     }
1195
1196   dr = XCNEW (struct data_reference);
1197   DR_STMT (dr) = stmt;
1198   DR_REF (dr) = memref;
1199   DR_IS_READ (dr) = is_read;
1200   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1201
1202   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1203                         nest != NULL ? loop : NULL);
1204   dr_analyze_indices (dr, nest, loop);
1205   dr_analyze_alias (dr);
1206
1207   if (dump_file && (dump_flags & TDF_DETAILS))
1208     {
1209       unsigned i;
1210       fprintf (dump_file, "\tbase_address: ");
1211       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1212       fprintf (dump_file, "\n\toffset from base address: ");
1213       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1214       fprintf (dump_file, "\n\tconstant offset from base address: ");
1215       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1216       fprintf (dump_file, "\n\tstep: ");
1217       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1218       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1219       fprintf (dump_file, "\n\tbase misalignment: %d",
1220                DR_BASE_MISALIGNMENT (dr));
1221       fprintf (dump_file, "\n\toffset alignment: %d",
1222                DR_OFFSET_ALIGNMENT (dr));
1223       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1224       fprintf (dump_file, "\n\tbase_object: ");
1225       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1226       fprintf (dump_file, "\n");
1227       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1228         {
1229           fprintf (dump_file, "\tAccess function %d: ", i);
1230           print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1231         }
1232     }
1233
1234   return dr;
1235 }
1236
1237 /*  A helper function computes order between two tree epxressions T1 and T2.
1238     This is used in comparator functions sorting objects based on the order
1239     of tree expressions.  The function returns -1, 0, or 1.  */
1240
1241 int
1242 data_ref_compare_tree (tree t1, tree t2)
1243 {
1244   int i, cmp;
1245   enum tree_code code;
1246   char tclass;
1247
1248   if (t1 == t2)
1249     return 0;
1250   if (t1 == NULL)
1251     return -1;
1252   if (t2 == NULL)
1253     return 1;
1254
1255   STRIP_USELESS_TYPE_CONVERSION (t1);
1256   STRIP_USELESS_TYPE_CONVERSION (t2);
1257   if (t1 == t2)
1258     return 0;
1259
1260   if (TREE_CODE (t1) != TREE_CODE (t2)
1261       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1262     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1263
1264   code = TREE_CODE (t1);
1265   switch (code)
1266     {
1267     case INTEGER_CST:
1268       return tree_int_cst_compare (t1, t2);
1269
1270     case STRING_CST:
1271       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1272         return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1273       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1274                      TREE_STRING_LENGTH (t1));
1275
1276     case SSA_NAME:
1277       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1278         return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1279       break;
1280
1281     default:
1282       if (POLY_INT_CST_P (t1))
1283         return compare_sizes_for_sort (wi::to_poly_widest (t1),
1284                                        wi::to_poly_widest (t2));
1285
1286       tclass = TREE_CODE_CLASS (code);
1287
1288       /* For decls, compare their UIDs.  */
1289       if (tclass == tcc_declaration)
1290         {
1291           if (DECL_UID (t1) != DECL_UID (t2))
1292             return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1293           break;
1294         }
1295       /* For expressions, compare their operands recursively.  */
1296       else if (IS_EXPR_CODE_CLASS (tclass))
1297         {
1298           for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1299             {
1300               cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1301                                            TREE_OPERAND (t2, i));
1302               if (cmp != 0)
1303                 return cmp;
1304             }
1305         }
1306       else
1307         gcc_unreachable ();
1308     }
1309
1310   return 0;
1311 }
1312
1313 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1314    check.  */
1315
1316 bool
1317 runtime_alias_check_p (ddr_p ddr, struct loop *loop, bool speed_p)
1318 {
1319   if (dump_enabled_p ())
1320     {
1321       dump_printf (MSG_NOTE, "consider run-time aliasing test between ");
1322       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
1323       dump_printf (MSG_NOTE,  " and ");
1324       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
1325       dump_printf (MSG_NOTE, "\n");
1326     }
1327
1328   if (!speed_p)
1329     {
1330       if (dump_enabled_p ())
1331         dump_printf (MSG_MISSED_OPTIMIZATION,
1332                      "runtime alias check not supported when optimizing "
1333                      "for size.\n");
1334       return false;
1335     }
1336
1337   /* FORNOW: We don't support versioning with outer-loop in either
1338      vectorization or loop distribution.  */
1339   if (loop != NULL && loop->inner != NULL)
1340     {
1341       if (dump_enabled_p ())
1342         dump_printf (MSG_MISSED_OPTIMIZATION,
1343                      "runtime alias check not supported for outer loop.\n");
1344       return false;
1345     }
1346
1347   return true;
1348 }
1349
1350 /* Operator == between two dr_with_seg_len objects.
1351
1352    This equality operator is used to make sure two data refs
1353    are the same one so that we will consider to combine the
1354    aliasing checks of those two pairs of data dependent data
1355    refs.  */
1356
1357 static bool
1358 operator == (const dr_with_seg_len& d1,
1359              const dr_with_seg_len& d2)
1360 {
1361   return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1362                            DR_BASE_ADDRESS (d2.dr), 0)
1363           && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1364           && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1365           && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1366           && known_eq (d1.access_size, d2.access_size)
1367           && d1.align == d2.align);
1368 }
1369
1370 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1371    so that we can combine aliasing checks in one scan.  */
1372
1373 static int
1374 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1375 {
1376   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1377   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1378   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1379   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1380
1381   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1382      if a and c have the same basic address snd step, and b and d have the same
1383      address and step.  Therefore, if any a&c or b&d don't have the same address
1384      and step, we don't care the order of those two pairs after sorting.  */
1385   int comp_res;
1386
1387   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1388                                          DR_BASE_ADDRESS (b1.dr))) != 0)
1389     return comp_res;
1390   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1391                                          DR_BASE_ADDRESS (b2.dr))) != 0)
1392     return comp_res;
1393   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1394                                          DR_STEP (b1.dr))) != 0)
1395     return comp_res;
1396   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1397                                          DR_STEP (b2.dr))) != 0)
1398     return comp_res;
1399   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1400                                          DR_OFFSET (b1.dr))) != 0)
1401     return comp_res;
1402   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1403                                          DR_INIT (b1.dr))) != 0)
1404     return comp_res;
1405   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1406                                          DR_OFFSET (b2.dr))) != 0)
1407     return comp_res;
1408   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1409                                          DR_INIT (b2.dr))) != 0)
1410     return comp_res;
1411
1412   return 0;
1413 }
1414
1415 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1416    FACTOR is number of iterations that each data reference is accessed.
1417
1418    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1419    we create an expression:
1420
1421    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1422    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1423
1424    for aliasing checks.  However, in some cases we can decrease the number
1425    of checks by combining two checks into one.  For example, suppose we have
1426    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1427    condition is satisfied:
1428
1429    load_ptr_0 < load_ptr_1  &&
1430    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1431
1432    (this condition means, in each iteration of vectorized loop, the accessed
1433    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1434    load_ptr_1.)
1435
1436    we then can use only the following expression to finish the alising checks
1437    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1438
1439    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1440    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1441
1442    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1443    basic address.  */
1444
1445 void
1446 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1447                                poly_uint64)
1448 {
1449   /* Sort the collected data ref pairs so that we can scan them once to
1450      combine all possible aliasing checks.  */
1451   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1452
1453   /* Scan the sorted dr pairs and check if we can combine alias checks
1454      of two neighboring dr pairs.  */
1455   for (size_t i = 1; i < alias_pairs->length (); ++i)
1456     {
1457       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1458       dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
1459                       *dr_b1 = &(*alias_pairs)[i-1].second,
1460                       *dr_a2 = &(*alias_pairs)[i].first,
1461                       *dr_b2 = &(*alias_pairs)[i].second;
1462
1463       /* Remove duplicate data ref pairs.  */
1464       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1465         {
1466           if (dump_enabled_p ())
1467             {
1468               dump_printf (MSG_NOTE, "found equal ranges ");
1469               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1470               dump_printf (MSG_NOTE,  ", ");
1471               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1472               dump_printf (MSG_NOTE,  " and ");
1473               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1474               dump_printf (MSG_NOTE,  ", ");
1475               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1476               dump_printf (MSG_NOTE, "\n");
1477             }
1478           alias_pairs->ordered_remove (i--);
1479           continue;
1480         }
1481
1482       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1483         {
1484           /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1485              and DR_A1 and DR_A2 are two consecutive memrefs.  */
1486           if (*dr_a1 == *dr_a2)
1487             {
1488               std::swap (dr_a1, dr_b1);
1489               std::swap (dr_a2, dr_b2);
1490             }
1491
1492           poly_int64 init_a1, init_a2;
1493           /* Only consider cases in which the distance between the initial
1494              DR_A1 and the initial DR_A2 is known at compile time.  */
1495           if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1496                                 DR_BASE_ADDRESS (dr_a2->dr), 0)
1497               || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1498                                    DR_OFFSET (dr_a2->dr), 0)
1499               || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1500               || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1501             continue;
1502
1503           /* Don't combine if we can't tell which one comes first.  */
1504           if (!ordered_p (init_a1, init_a2))
1505             continue;
1506
1507           /* Make sure dr_a1 starts left of dr_a2.  */
1508           if (maybe_gt (init_a1, init_a2))
1509             {
1510               std::swap (*dr_a1, *dr_a2);
1511               std::swap (init_a1, init_a2);
1512             }
1513
1514           /* Work out what the segment length would be if we did combine
1515              DR_A1 and DR_A2:
1516
1517              - If DR_A1 and DR_A2 have equal lengths, that length is
1518                also the combined length.
1519
1520              - If DR_A1 and DR_A2 both have negative "lengths", the combined
1521                length is the lower bound on those lengths.
1522
1523              - If DR_A1 and DR_A2 both have positive lengths, the combined
1524                length is the upper bound on those lengths.
1525
1526              Other cases are unlikely to give a useful combination.
1527
1528              The lengths both have sizetype, so the sign is taken from
1529              the step instead.  */
1530           if (!operand_equal_p (dr_a1->seg_len, dr_a2->seg_len, 0))
1531             {
1532               poly_uint64 seg_len_a1, seg_len_a2;
1533               if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1534                   || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1535                 continue;
1536
1537               tree indicator_a = dr_direction_indicator (dr_a1->dr);
1538               if (TREE_CODE (indicator_a) != INTEGER_CST)
1539                 continue;
1540
1541               tree indicator_b = dr_direction_indicator (dr_a2->dr);
1542               if (TREE_CODE (indicator_b) != INTEGER_CST)
1543                 continue;
1544
1545               int sign_a = tree_int_cst_sgn (indicator_a);
1546               int sign_b = tree_int_cst_sgn (indicator_b);
1547
1548               poly_uint64 new_seg_len;
1549               if (sign_a <= 0 && sign_b <= 0)
1550                 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1551               else if (sign_a >= 0 && sign_b >= 0)
1552                 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1553               else
1554                 continue;
1555
1556               dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1557                                               new_seg_len);
1558               dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1559             }
1560
1561           /* This is always positive due to the swap above.  */
1562           poly_uint64 diff = init_a2 - init_a1;
1563
1564           /* The new check will start at DR_A1.  Make sure that its access
1565              size encompasses the initial DR_A2.  */
1566           if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1567             {
1568               dr_a1->access_size = upper_bound (dr_a1->access_size,
1569                                                 diff + dr_a2->access_size);
1570               unsigned int new_align = known_alignment (dr_a1->access_size);
1571               dr_a1->align = MIN (dr_a1->align, new_align);
1572             }
1573           if (dump_enabled_p ())
1574             {
1575               dump_printf (MSG_NOTE, "merging ranges for ");
1576               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1577               dump_printf (MSG_NOTE,  ", ");
1578               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1579               dump_printf (MSG_NOTE,  " and ");
1580               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1581               dump_printf (MSG_NOTE,  ", ");
1582               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1583               dump_printf (MSG_NOTE, "\n");
1584             }
1585           alias_pairs->ordered_remove (i);
1586           i--;
1587         }
1588     }
1589 }
1590
1591 /* Given LOOP's two data references and segment lengths described by DR_A
1592    and DR_B, create expression checking if the two addresses ranges intersect
1593    with each other based on index of the two addresses.  This can only be
1594    done if DR_A and DR_B referring to the same (array) object and the index
1595    is the only difference.  For example:
1596
1597                        DR_A                           DR_B
1598       data-ref         arr[i]                         arr[j]
1599       base_object      arr                            arr
1600       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
1601
1602    The addresses and their index are like:
1603
1604         |<- ADDR_A    ->|          |<- ADDR_B    ->|
1605      ------------------------------------------------------->
1606         |   |   |   |   |          |   |   |   |   |
1607      ------------------------------------------------------->
1608         i_0 ...         i_0+4      j_0 ...         j_0+4
1609
1610    We can create expression based on index rather than address:
1611
1612      (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1613
1614    Note evolution step of index needs to be considered in comparison.  */
1615
1616 static bool
1617 create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
1618                                      const dr_with_seg_len& dr_a,
1619                                      const dr_with_seg_len& dr_b)
1620 {
1621   if (integer_zerop (DR_STEP (dr_a.dr))
1622       || integer_zerop (DR_STEP (dr_b.dr))
1623       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1624     return false;
1625
1626   poly_uint64 seg_len1, seg_len2;
1627   if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
1628       || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
1629     return false;
1630
1631   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1632     return false;
1633
1634   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1635     return false;
1636
1637   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1638     return false;
1639
1640   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1641
1642   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1643   unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
1644   if (neg_step)
1645     {
1646       abs_step = -abs_step;
1647       seg_len1 = -seg_len1;
1648       seg_len2 = -seg_len2;
1649     }
1650   else
1651     {
1652       /* Include the access size in the length, so that we only have one
1653          tree addition below.  */
1654       seg_len1 += dr_a.access_size;
1655       seg_len2 += dr_b.access_size;
1656     }
1657
1658   /* Infer the number of iterations with which the memory segment is accessed
1659      by DR.  In other words, alias is checked if memory segment accessed by
1660      DR_A in some iterations intersect with memory segment accessed by DR_B
1661      in the same amount iterations.
1662      Note segnment length is a linear function of number of iterations with
1663      DR_STEP as the coefficient.  */
1664   poly_uint64 niter_len1, niter_len2;
1665   if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
1666       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
1667     return false;
1668
1669   poly_uint64 niter_access1 = 0, niter_access2 = 0;
1670   if (neg_step)
1671     {
1672       /* Divide each access size by the byte step, rounding up.  */
1673       if (!can_div_trunc_p (dr_a.access_size - abs_step - 1,
1674                             abs_step, &niter_access1)
1675           || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
1676                                abs_step, &niter_access2))
1677         return false;
1678     }
1679
1680   unsigned int i;
1681   for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1682     {
1683       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1684       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1685       /* Two indices must be the same if they are not scev, or not scev wrto
1686          current loop being vecorized.  */
1687       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1688           || TREE_CODE (access2) != POLYNOMIAL_CHREC
1689           || CHREC_VARIABLE (access1) != (unsigned)loop->num
1690           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1691         {
1692           if (operand_equal_p (access1, access2, 0))
1693             continue;
1694
1695           return false;
1696         }
1697       /* The two indices must have the same step.  */
1698       if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1699         return false;
1700
1701       tree idx_step = CHREC_RIGHT (access1);
1702       /* Index must have const step, otherwise DR_STEP won't be constant.  */
1703       gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1704       /* Index must evaluate in the same direction as DR.  */
1705       gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1706
1707       tree min1 = CHREC_LEFT (access1);
1708       tree min2 = CHREC_LEFT (access2);
1709       if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1710         return false;
1711
1712       /* Ideally, alias can be checked against loop's control IV, but we
1713          need to prove linear mapping between control IV and reference
1714          index.  Although that should be true, we check against (array)
1715          index of data reference.  Like segment length, index length is
1716          linear function of the number of iterations with index_step as
1717          the coefficient, i.e, niter_len * idx_step.  */
1718       tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1719                                    build_int_cst (TREE_TYPE (min1),
1720                                                   niter_len1));
1721       tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1722                                    build_int_cst (TREE_TYPE (min2),
1723                                                   niter_len2));
1724       tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
1725       tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
1726       /* Adjust ranges for negative step.  */
1727       if (neg_step)
1728         {
1729           /* IDX_LEN1 and IDX_LEN2 are negative in this case.  */
1730           std::swap (min1, max1);
1731           std::swap (min2, max2);
1732
1733           /* As with the lengths just calculated, we've measured the access
1734              sizes in iterations, so multiply them by the index step.  */
1735           tree idx_access1
1736             = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1737                            build_int_cst (TREE_TYPE (min1), niter_access1));
1738           tree idx_access2
1739             = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1740                            build_int_cst (TREE_TYPE (min2), niter_access2));
1741
1742           /* MINUS_EXPR because the above values are negative.  */
1743           max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (max1), max1, idx_access1);
1744           max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (max2), max2, idx_access2);
1745         }
1746       tree part_cond_expr
1747         = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1748             fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
1749             fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
1750       if (*cond_expr)
1751         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1752                                   *cond_expr, part_cond_expr);
1753       else
1754         *cond_expr = part_cond_expr;
1755     }
1756   return true;
1757 }
1758
1759 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
1760    every address ADDR accessed by D:
1761
1762      *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
1763
1764    In this case, every element accessed by D is aligned to at least
1765    ALIGN bytes.
1766
1767    If ALIGN is zero then instead set *SEG_MAX_OUT so that:
1768
1769      *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT.  */
1770
1771 static void
1772 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
1773                      tree *seg_max_out, HOST_WIDE_INT align)
1774 {
1775   /* Each access has the following pattern:
1776
1777           <- |seg_len| ->
1778           <--- A: -ve step --->
1779           +-----+-------+-----+-------+-----+
1780           | n-1 | ,.... |  0  | ..... | n-1 |
1781           +-----+-------+-----+-------+-----+
1782                         <--- B: +ve step --->
1783                         <- |seg_len| ->
1784                         |
1785                    base address
1786
1787      where "n" is the number of scalar iterations covered by the segment.
1788      (This should be VF for a particular pair if we know that both steps
1789      are the same, otherwise it will be the full number of scalar loop
1790      iterations.)
1791
1792      A is the range of bytes accessed when the step is negative,
1793      B is the range when the step is positive.
1794
1795      If the access size is "access_size" bytes, the lowest addressed byte is:
1796
1797          base + (step < 0 ? seg_len : 0)   [LB]
1798
1799      and the highest addressed byte is always below:
1800
1801          base + (step < 0 ? 0 : seg_len) + access_size   [UB]
1802
1803      Thus:
1804
1805          LB <= ADDR < UB
1806
1807      If ALIGN is nonzero, all three values are aligned to at least ALIGN
1808      bytes, so:
1809
1810          LB <= ADDR <= UB - ALIGN
1811
1812      where "- ALIGN" folds naturally with the "+ access_size" and often
1813      cancels it out.
1814
1815      We don't try to simplify LB and UB beyond this (e.g. by using
1816      MIN and MAX based on whether seg_len rather than the stride is
1817      negative) because it is possible for the absolute size of the
1818      segment to overflow the range of a ssize_t.
1819
1820      Keeping the pointer_plus outside of the cond_expr should allow
1821      the cond_exprs to be shared with other alias checks.  */
1822   tree indicator = dr_direction_indicator (d.dr);
1823   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
1824                                fold_convert (ssizetype, indicator),
1825                                ssize_int (0));
1826   tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
1827                                             DR_OFFSET (d.dr));
1828   addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
1829   tree seg_len
1830     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
1831
1832   tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1833                                 seg_len, size_zero_node);
1834   tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1835                                 size_zero_node, seg_len);
1836   max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
1837                            size_int (d.access_size - align));
1838
1839   *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
1840   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
1841 }
1842
1843 /* Given two data references and segment lengths described by DR_A and DR_B,
1844    create expression checking if the two addresses ranges intersect with
1845    each other:
1846
1847      ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
1848      || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0))  */
1849
1850 static void
1851 create_intersect_range_checks (struct loop *loop, tree *cond_expr,
1852                                const dr_with_seg_len& dr_a,
1853                                const dr_with_seg_len& dr_b)
1854 {
1855   *cond_expr = NULL_TREE;
1856   if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
1857     return;
1858
1859   unsigned HOST_WIDE_INT min_align;
1860   tree_code cmp_code;
1861   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
1862       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
1863     {
1864       /* In this case adding access_size to seg_len is likely to give
1865          a simple X * step, where X is either the number of scalar
1866          iterations or the vectorization factor.  We're better off
1867          keeping that, rather than subtracting an alignment from it.
1868
1869          In this case the maximum values are exclusive and so there is
1870          no alias if the maximum of one segment equals the minimum
1871          of another.  */
1872       min_align = 0;
1873       cmp_code = LE_EXPR;
1874     }
1875   else
1876     {
1877       /* Calculate the minimum alignment shared by all four pointers,
1878          then arrange for this alignment to be subtracted from the
1879          exclusive maximum values to get inclusive maximum values.
1880          This "- min_align" is cumulative with a "+ access_size"
1881          in the calculation of the maximum values.  In the best
1882          (and common) case, the two cancel each other out, leaving
1883          us with an inclusive bound based only on seg_len.  In the
1884          worst case we're simply adding a smaller number than before.
1885
1886          Because the maximum values are inclusive, there is an alias
1887          if the maximum value of one segment is equal to the minimum
1888          value of the other.  */
1889       min_align = MIN (dr_a.align, dr_b.align);
1890       cmp_code = LT_EXPR;
1891     }
1892
1893   tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
1894   get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
1895   get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
1896
1897   *cond_expr
1898     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1899         fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
1900         fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
1901 }
1902
1903 /* Create a conditional expression that represents the run-time checks for
1904    overlapping of address ranges represented by a list of data references
1905    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
1906    COND_EXPR is the conditional expression to be used in the if statement
1907    that controls which version of the loop gets executed at runtime.  */
1908
1909 void
1910 create_runtime_alias_checks (struct loop *loop,
1911                              vec<dr_with_seg_len_pair_t> *alias_pairs,
1912                              tree * cond_expr)
1913 {
1914   tree part_cond_expr;
1915
1916   for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
1917     {
1918       const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
1919       const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
1920
1921       if (dump_enabled_p ())
1922         {
1923           dump_printf (MSG_NOTE, "create runtime check for data references ");
1924           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
1925           dump_printf (MSG_NOTE, " and ");
1926           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
1927           dump_printf (MSG_NOTE, "\n");
1928         }
1929
1930       /* Create condition expression for each pair data references.  */
1931       create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
1932       if (*cond_expr)
1933         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1934                                   *cond_expr, part_cond_expr);
1935       else
1936         *cond_expr = part_cond_expr;
1937     }
1938 }
1939
1940 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1941    expressions.  */
1942 static bool
1943 dr_equal_offsets_p1 (tree offset1, tree offset2)
1944 {
1945   bool res;
1946
1947   STRIP_NOPS (offset1);
1948   STRIP_NOPS (offset2);
1949
1950   if (offset1 == offset2)
1951     return true;
1952
1953   if (TREE_CODE (offset1) != TREE_CODE (offset2)
1954       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1955     return false;
1956
1957   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1958                              TREE_OPERAND (offset2, 0));
1959
1960   if (!res || !BINARY_CLASS_P (offset1))
1961     return res;
1962
1963   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1964                              TREE_OPERAND (offset2, 1));
1965
1966   return res;
1967 }
1968
1969 /* Check if DRA and DRB have equal offsets.  */
1970 bool
1971 dr_equal_offsets_p (struct data_reference *dra,
1972                     struct data_reference *drb)
1973 {
1974   tree offset1, offset2;
1975
1976   offset1 = DR_OFFSET (dra);
1977   offset2 = DR_OFFSET (drb);
1978
1979   return dr_equal_offsets_p1 (offset1, offset2);
1980 }
1981
1982 /* Returns true if FNA == FNB.  */
1983
1984 static bool
1985 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1986 {
1987   unsigned i, n = fna.length ();
1988
1989   if (n != fnb.length ())
1990     return false;
1991
1992   for (i = 0; i < n; i++)
1993     if (!operand_equal_p (fna[i], fnb[i], 0))
1994       return false;
1995
1996   return true;
1997 }
1998
1999 /* If all the functions in CF are the same, returns one of them,
2000    otherwise returns NULL.  */
2001
2002 static affine_fn
2003 common_affine_function (conflict_function *cf)
2004 {
2005   unsigned i;
2006   affine_fn comm;
2007
2008   if (!CF_NONTRIVIAL_P (cf))
2009     return affine_fn ();
2010
2011   comm = cf->fns[0];
2012
2013   for (i = 1; i < cf->n; i++)
2014     if (!affine_function_equal_p (comm, cf->fns[i]))
2015       return affine_fn ();
2016
2017   return comm;
2018 }
2019
2020 /* Returns the base of the affine function FN.  */
2021
2022 static tree
2023 affine_function_base (affine_fn fn)
2024 {
2025   return fn[0];
2026 }
2027
2028 /* Returns true if FN is a constant.  */
2029
2030 static bool
2031 affine_function_constant_p (affine_fn fn)
2032 {
2033   unsigned i;
2034   tree coef;
2035
2036   for (i = 1; fn.iterate (i, &coef); i++)
2037     if (!integer_zerop (coef))
2038       return false;
2039
2040   return true;
2041 }
2042
2043 /* Returns true if FN is the zero constant function.  */
2044
2045 static bool
2046 affine_function_zero_p (affine_fn fn)
2047 {
2048   return (integer_zerop (affine_function_base (fn))
2049           && affine_function_constant_p (fn));
2050 }
2051
2052 /* Returns a signed integer type with the largest precision from TA
2053    and TB.  */
2054
2055 static tree
2056 signed_type_for_types (tree ta, tree tb)
2057 {
2058   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2059     return signed_type_for (ta);
2060   else
2061     return signed_type_for (tb);
2062 }
2063
2064 /* Applies operation OP on affine functions FNA and FNB, and returns the
2065    result.  */
2066
2067 static affine_fn
2068 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2069 {
2070   unsigned i, n, m;
2071   affine_fn ret;
2072   tree coef;
2073
2074   if (fnb.length () > fna.length ())
2075     {
2076       n = fna.length ();
2077       m = fnb.length ();
2078     }
2079   else
2080     {
2081       n = fnb.length ();
2082       m = fna.length ();
2083     }
2084
2085   ret.create (m);
2086   for (i = 0; i < n; i++)
2087     {
2088       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2089                                          TREE_TYPE (fnb[i]));
2090       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2091     }
2092
2093   for (; fna.iterate (i, &coef); i++)
2094     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2095                                  coef, integer_zero_node));
2096   for (; fnb.iterate (i, &coef); i++)
2097     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2098                                  integer_zero_node, coef));
2099
2100   return ret;
2101 }
2102
2103 /* Returns the sum of affine functions FNA and FNB.  */
2104
2105 static affine_fn
2106 affine_fn_plus (affine_fn fna, affine_fn fnb)
2107 {
2108   return affine_fn_op (PLUS_EXPR, fna, fnb);
2109 }
2110
2111 /* Returns the difference of affine functions FNA and FNB.  */
2112
2113 static affine_fn
2114 affine_fn_minus (affine_fn fna, affine_fn fnb)
2115 {
2116   return affine_fn_op (MINUS_EXPR, fna, fnb);
2117 }
2118
2119 /* Frees affine function FN.  */
2120
2121 static void
2122 affine_fn_free (affine_fn fn)
2123 {
2124   fn.release ();
2125 }
2126
2127 /* Determine for each subscript in the data dependence relation DDR
2128    the distance.  */
2129
2130 static void
2131 compute_subscript_distance (struct data_dependence_relation *ddr)
2132 {
2133   conflict_function *cf_a, *cf_b;
2134   affine_fn fn_a, fn_b, diff;
2135
2136   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2137     {
2138       unsigned int i;
2139
2140       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2141         {
2142           struct subscript *subscript;
2143
2144           subscript = DDR_SUBSCRIPT (ddr, i);
2145           cf_a = SUB_CONFLICTS_IN_A (subscript);
2146           cf_b = SUB_CONFLICTS_IN_B (subscript);
2147
2148           fn_a = common_affine_function (cf_a);
2149           fn_b = common_affine_function (cf_b);
2150           if (!fn_a.exists () || !fn_b.exists ())
2151             {
2152               SUB_DISTANCE (subscript) = chrec_dont_know;
2153               return;
2154             }
2155           diff = affine_fn_minus (fn_a, fn_b);
2156
2157           if (affine_function_constant_p (diff))
2158             SUB_DISTANCE (subscript) = affine_function_base (diff);
2159           else
2160             SUB_DISTANCE (subscript) = chrec_dont_know;
2161
2162           affine_fn_free (diff);
2163         }
2164     }
2165 }
2166
2167 /* Returns the conflict function for "unknown".  */
2168
2169 static conflict_function *
2170 conflict_fn_not_known (void)
2171 {
2172   conflict_function *fn = XCNEW (conflict_function);
2173   fn->n = NOT_KNOWN;
2174
2175   return fn;
2176 }
2177
2178 /* Returns the conflict function for "independent".  */
2179
2180 static conflict_function *
2181 conflict_fn_no_dependence (void)
2182 {
2183   conflict_function *fn = XCNEW (conflict_function);
2184   fn->n = NO_DEPENDENCE;
2185
2186   return fn;
2187 }
2188
2189 /* Returns true if the address of OBJ is invariant in LOOP.  */
2190
2191 static bool
2192 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
2193 {
2194   while (handled_component_p (obj))
2195     {
2196       if (TREE_CODE (obj) == ARRAY_REF)
2197         {
2198           /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
2199              need to check the stride and the lower bound of the reference.  */
2200           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2201                                                       loop->num)
2202               || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
2203                                                          loop->num))
2204             return false;
2205         }
2206       else if (TREE_CODE (obj) == COMPONENT_REF)
2207         {
2208           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2209                                                       loop->num))
2210             return false;
2211         }
2212       obj = TREE_OPERAND (obj, 0);
2213     }
2214
2215   if (!INDIRECT_REF_P (obj)
2216       && TREE_CODE (obj) != MEM_REF)
2217     return true;
2218
2219   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2220                                                   loop->num);
2221 }
2222
2223 /* Returns false if we can prove that data references A and B do not alias,
2224    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2225    considered.  */
2226
2227 bool
2228 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2229                 bool loop_nest)
2230 {
2231   tree addr_a = DR_BASE_OBJECT (a);
2232   tree addr_b = DR_BASE_OBJECT (b);
2233
2234   /* If we are not processing a loop nest but scalar code we
2235      do not need to care about possible cross-iteration dependences
2236      and thus can process the full original reference.  Do so,
2237      similar to how loop invariant motion applies extra offset-based
2238      disambiguation.  */
2239   if (!loop_nest)
2240     {
2241       aff_tree off1, off2;
2242       poly_widest_int size1, size2;
2243       get_inner_reference_aff (DR_REF (a), &off1, &size1);
2244       get_inner_reference_aff (DR_REF (b), &off2, &size2);
2245       aff_combination_scale (&off1, -1);
2246       aff_combination_add (&off2, &off1);
2247       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2248         return false;
2249     }
2250
2251   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2252       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2253       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2254       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2255     return false;
2256
2257   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2258      do not know the size of the base-object.  So we cannot do any
2259      offset/overlap based analysis but have to rely on points-to
2260      information only.  */
2261   if (TREE_CODE (addr_a) == MEM_REF
2262       && (DR_UNCONSTRAINED_BASE (a)
2263           || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2264     {
2265       /* For true dependences we can apply TBAA.  */
2266       if (flag_strict_aliasing
2267           && DR_IS_WRITE (a) && DR_IS_READ (b)
2268           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2269                                      get_alias_set (DR_REF (b))))
2270         return false;
2271       if (TREE_CODE (addr_b) == MEM_REF)
2272         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2273                                        TREE_OPERAND (addr_b, 0));
2274       else
2275         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2276                                        build_fold_addr_expr (addr_b));
2277     }
2278   else if (TREE_CODE (addr_b) == MEM_REF
2279            && (DR_UNCONSTRAINED_BASE (b)
2280                || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
2281     {
2282       /* For true dependences we can apply TBAA.  */
2283       if (flag_strict_aliasing
2284           && DR_IS_WRITE (a) && DR_IS_READ (b)
2285           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2286                                      get_alias_set (DR_REF (b))))
2287         return false;
2288       if (TREE_CODE (addr_a) == MEM_REF)
2289         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2290                                        TREE_OPERAND (addr_b, 0));
2291       else
2292         return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
2293                                        TREE_OPERAND (addr_b, 0));
2294     }
2295
2296   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2297      that is being subsetted in the loop nest.  */
2298   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2299     return refs_output_dependent_p (addr_a, addr_b);
2300   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2301     return refs_anti_dependent_p (addr_a, addr_b);
2302   return refs_may_alias_p (addr_a, addr_b);
2303 }
2304
2305 /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
2306    if it is meaningful to compare their associated access functions
2307    when checking for dependencies.  */
2308
2309 static bool
2310 access_fn_components_comparable_p (tree ref_a, tree ref_b)
2311 {
2312   /* Allow pairs of component refs from the following sets:
2313
2314        { REALPART_EXPR, IMAGPART_EXPR }
2315        { COMPONENT_REF }
2316        { ARRAY_REF }.  */
2317   tree_code code_a = TREE_CODE (ref_a);
2318   tree_code code_b = TREE_CODE (ref_b);
2319   if (code_a == IMAGPART_EXPR)
2320     code_a = REALPART_EXPR;
2321   if (code_b == IMAGPART_EXPR)
2322     code_b = REALPART_EXPR;
2323   if (code_a != code_b)
2324     return false;
2325
2326   if (TREE_CODE (ref_a) == COMPONENT_REF)
2327     /* ??? We cannot simply use the type of operand #0 of the refs here as
2328        the Fortran compiler smuggles type punning into COMPONENT_REFs.
2329        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
2330     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
2331             == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
2332
2333   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
2334                              TREE_TYPE (TREE_OPERAND (ref_b, 0)));
2335 }
2336
2337 /* Initialize a data dependence relation between data accesses A and
2338    B.  NB_LOOPS is the number of loops surrounding the references: the
2339    size of the classic distance/direction vectors.  */
2340
2341 struct data_dependence_relation *
2342 initialize_data_dependence_relation (struct data_reference *a,
2343                                      struct data_reference *b,
2344                                      vec<loop_p> loop_nest)
2345 {
2346   struct data_dependence_relation *res;
2347   unsigned int i;
2348
2349   res = XCNEW (struct data_dependence_relation);
2350   DDR_A (res) = a;
2351   DDR_B (res) = b;
2352   DDR_LOOP_NEST (res).create (0);
2353   DDR_SUBSCRIPTS (res).create (0);
2354   DDR_DIR_VECTS (res).create (0);
2355   DDR_DIST_VECTS (res).create (0);
2356
2357   if (a == NULL || b == NULL)
2358     {
2359       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2360       return res;
2361     }
2362
2363   /* If the data references do not alias, then they are independent.  */
2364   if (!dr_may_alias_p (a, b, loop_nest.exists ()))
2365     {
2366       DDR_ARE_DEPENDENT (res) = chrec_known;
2367       return res;
2368     }
2369
2370   unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
2371   unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
2372   if (num_dimensions_a == 0 || num_dimensions_b == 0)
2373     {
2374       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2375       return res;
2376     }
2377
2378   /* For unconstrained bases, the root (highest-indexed) subscript
2379      describes a variation in the base of the original DR_REF rather
2380      than a component access.  We have no type that accurately describes
2381      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
2382      applying this subscript) so limit the search to the last real
2383      component access.
2384
2385      E.g. for:
2386
2387         void
2388         f (int a[][8], int b[][8])
2389         {
2390           for (int i = 0; i < 8; ++i)
2391             a[i * 2][0] = b[i][0];
2392         }
2393
2394      the a and b accesses have a single ARRAY_REF component reference [0]
2395      but have two subscripts.  */
2396   if (DR_UNCONSTRAINED_BASE (a))
2397     num_dimensions_a -= 1;
2398   if (DR_UNCONSTRAINED_BASE (b))
2399     num_dimensions_b -= 1;
2400
2401   /* These structures describe sequences of component references in
2402      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
2403      specific access function.  */
2404   struct {
2405     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
2406        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
2407        indices.  In C notation, these are the indices of the rightmost
2408        component references; e.g. for a sequence .b.c.d, the start
2409        index is for .d.  */
2410     unsigned int start_a;
2411     unsigned int start_b;
2412
2413     /* The sequence contains LENGTH consecutive access functions from
2414        each DR.  */
2415     unsigned int length;
2416
2417     /* The enclosing objects for the A and B sequences respectively,
2418        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
2419        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
2420     tree object_a;
2421     tree object_b;
2422   } full_seq = {}, struct_seq = {};
2423
2424   /* Before each iteration of the loop:
2425
2426      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
2427      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
2428   unsigned int index_a = 0;
2429   unsigned int index_b = 0;
2430   tree ref_a = DR_REF (a);
2431   tree ref_b = DR_REF (b);
2432
2433   /* Now walk the component references from the final DR_REFs back up to
2434      the enclosing base objects.  Each component reference corresponds
2435      to one access function in the DR, with access function 0 being for
2436      the final DR_REF and the highest-indexed access function being the
2437      one that is applied to the base of the DR.
2438
2439      Look for a sequence of component references whose access functions
2440      are comparable (see access_fn_components_comparable_p).  If more
2441      than one such sequence exists, pick the one nearest the base
2442      (which is the leftmost sequence in C notation).  Store this sequence
2443      in FULL_SEQ.
2444
2445      For example, if we have:
2446
2447         struct foo { struct bar s; ... } (*a)[10], (*b)[10];
2448
2449         A: a[0][i].s.c.d
2450         B: __real b[0][i].s.e[i].f
2451
2452      (where d is the same type as the real component of f) then the access
2453      functions would be:
2454
2455                          0   1   2   3
2456         A:              .d  .c  .s [i]
2457
2458                  0   1   2   3   4   5
2459         B:  __real  .f [i]  .e  .s [i]
2460
2461      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
2462      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
2463      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
2464      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
2465      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
2466      index foo[10] arrays, so is again comparable.  The sequence is
2467      therefore:
2468
2469         A: [1, 3]  (i.e. [i].s.c)
2470         B: [3, 5]  (i.e. [i].s.e)
2471
2472      Also look for sequences of component references whose access
2473      functions are comparable and whose enclosing objects have the same
2474      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
2475      example, STRUCT_SEQ would be:
2476
2477         A: [1, 2]  (i.e. s.c)
2478         B: [3, 4]  (i.e. s.e)  */
2479   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
2480     {
2481       /* REF_A and REF_B must be one of the component access types
2482          allowed by dr_analyze_indices.  */
2483       gcc_checking_assert (access_fn_component_p (ref_a));
2484       gcc_checking_assert (access_fn_component_p (ref_b));
2485
2486       /* Get the immediately-enclosing objects for REF_A and REF_B,
2487          i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
2488          and DR_ACCESS_FN (B, INDEX_B).  */
2489       tree object_a = TREE_OPERAND (ref_a, 0);
2490       tree object_b = TREE_OPERAND (ref_b, 0);
2491
2492       tree type_a = TREE_TYPE (object_a);
2493       tree type_b = TREE_TYPE (object_b);
2494       if (access_fn_components_comparable_p (ref_a, ref_b))
2495         {
2496           /* This pair of component accesses is comparable for dependence
2497              analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
2498              DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
2499           if (full_seq.start_a + full_seq.length != index_a
2500               || full_seq.start_b + full_seq.length != index_b)
2501             {
2502               /* The accesses don't extend the current sequence,
2503                  so start a new one here.  */
2504               full_seq.start_a = index_a;
2505               full_seq.start_b = index_b;
2506               full_seq.length = 0;
2507             }
2508
2509           /* Add this pair of references to the sequence.  */
2510           full_seq.length += 1;
2511           full_seq.object_a = object_a;
2512           full_seq.object_b = object_b;
2513
2514           /* If the enclosing objects are structures (and thus have the
2515              same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
2516           if (TREE_CODE (type_a) == RECORD_TYPE)
2517             struct_seq = full_seq;
2518
2519           /* Move to the next containing reference for both A and B.  */
2520           ref_a = object_a;
2521           ref_b = object_b;
2522           index_a += 1;
2523           index_b += 1;
2524           continue;
2525         }
2526
2527       /* Try to approach equal type sizes.  */
2528       if (!COMPLETE_TYPE_P (type_a)
2529           || !COMPLETE_TYPE_P (type_b)
2530           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
2531           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
2532         break;
2533
2534       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
2535       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
2536       if (size_a <= size_b)
2537         {
2538           index_a += 1;
2539           ref_a = object_a;
2540         }
2541       if (size_b <= size_a)
2542         {
2543           index_b += 1;
2544           ref_b = object_b;
2545         }
2546     }
2547
2548   /* See whether FULL_SEQ ends at the base and whether the two bases
2549      are equal.  We do not care about TBAA or alignment info so we can
2550      use OEP_ADDRESS_OF to avoid false negatives.  */
2551   tree base_a = DR_BASE_OBJECT (a);
2552   tree base_b = DR_BASE_OBJECT (b);
2553   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
2554                       && full_seq.start_b + full_seq.length == num_dimensions_b
2555                       && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
2556                       && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
2557                       && types_compatible_p (TREE_TYPE (base_a),
2558                                              TREE_TYPE (base_b))
2559                       && (!loop_nest.exists ()
2560                           || (object_address_invariant_in_loop_p
2561                               (loop_nest[0], base_a))));
2562
2563   /* If the bases are the same, we can include the base variation too.
2564      E.g. the b accesses in:
2565
2566        for (int i = 0; i < n; ++i)
2567          b[i + 4][0] = b[i][0];
2568
2569      have a definite dependence distance of 4, while for:
2570
2571        for (int i = 0; i < n; ++i)
2572          a[i + 4][0] = b[i][0];
2573
2574      the dependence distance depends on the gap between a and b.
2575
2576      If the bases are different then we can only rely on the sequence
2577      rooted at a structure access, since arrays are allowed to overlap
2578      arbitrarily and change shape arbitrarily.  E.g. we treat this as
2579      valid code:
2580
2581        int a[256];
2582        ...
2583        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
2584
2585      where two lvalues with the same int[4][3] type overlap, and where
2586      both lvalues are distinct from the object's declared type.  */
2587   if (same_base_p)
2588     {
2589       if (DR_UNCONSTRAINED_BASE (a))
2590         full_seq.length += 1;
2591     }
2592   else
2593     full_seq = struct_seq;
2594
2595   /* Punt if we didn't find a suitable sequence.  */
2596   if (full_seq.length == 0)
2597     {
2598       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2599       return res;
2600     }
2601
2602   if (!same_base_p)
2603     {
2604       /* Partial overlap is possible for different bases when strict aliasing
2605          is not in effect.  It's also possible if either base involves a union
2606          access; e.g. for:
2607
2608            struct s1 { int a[2]; };
2609            struct s2 { struct s1 b; int c; };
2610            struct s3 { int d; struct s1 e; };
2611            union u { struct s2 f; struct s3 g; } *p, *q;
2612
2613          the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
2614          "p->g.e" (base "p->g") and might partially overlap the s1 at
2615          "q->g.e" (base "q->g").  */
2616       if (!flag_strict_aliasing
2617           || ref_contains_union_access_p (full_seq.object_a)
2618           || ref_contains_union_access_p (full_seq.object_b))
2619         {
2620           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2621           return res;
2622         }
2623
2624       DDR_COULD_BE_INDEPENDENT_P (res) = true;
2625       if (!loop_nest.exists ()
2626           || (object_address_invariant_in_loop_p (loop_nest[0],
2627                                                   full_seq.object_a)
2628               && object_address_invariant_in_loop_p (loop_nest[0],
2629                                                      full_seq.object_b)))
2630         {
2631           DDR_OBJECT_A (res) = full_seq.object_a;
2632           DDR_OBJECT_B (res) = full_seq.object_b;
2633         }
2634     }
2635
2636   DDR_AFFINE_P (res) = true;
2637   DDR_ARE_DEPENDENT (res) = NULL_TREE;
2638   DDR_SUBSCRIPTS (res).create (full_seq.length);
2639   DDR_LOOP_NEST (res) = loop_nest;
2640   DDR_INNER_LOOP (res) = 0;
2641   DDR_SELF_REFERENCE (res) = false;
2642
2643   for (i = 0; i < full_seq.length; ++i)
2644     {
2645       struct subscript *subscript;
2646
2647       subscript = XNEW (struct subscript);
2648       SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
2649       SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
2650       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
2651       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
2652       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
2653       SUB_DISTANCE (subscript) = chrec_dont_know;
2654       DDR_SUBSCRIPTS (res).safe_push (subscript);
2655     }
2656
2657   return res;
2658 }
2659
2660 /* Frees memory used by the conflict function F.  */
2661
2662 static void
2663 free_conflict_function (conflict_function *f)
2664 {
2665   unsigned i;
2666
2667   if (CF_NONTRIVIAL_P (f))
2668     {
2669       for (i = 0; i < f->n; i++)
2670         affine_fn_free (f->fns[i]);
2671     }
2672   free (f);
2673 }
2674
2675 /* Frees memory used by SUBSCRIPTS.  */
2676
2677 static void
2678 free_subscripts (vec<subscript_p> subscripts)
2679 {
2680   unsigned i;
2681   subscript_p s;
2682
2683   FOR_EACH_VEC_ELT (subscripts, i, s)
2684     {
2685       free_conflict_function (s->conflicting_iterations_in_a);
2686       free_conflict_function (s->conflicting_iterations_in_b);
2687       free (s);
2688     }
2689   subscripts.release ();
2690 }
2691
2692 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
2693    description.  */
2694
2695 static inline void
2696 finalize_ddr_dependent (struct data_dependence_relation *ddr,
2697                         tree chrec)
2698 {
2699   DDR_ARE_DEPENDENT (ddr) = chrec;
2700   free_subscripts (DDR_SUBSCRIPTS (ddr));
2701   DDR_SUBSCRIPTS (ddr).create (0);
2702 }
2703
2704 /* The dependence relation DDR cannot be represented by a distance
2705    vector.  */
2706
2707 static inline void
2708 non_affine_dependence_relation (struct data_dependence_relation *ddr)
2709 {
2710   if (dump_file && (dump_flags & TDF_DETAILS))
2711     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
2712
2713   DDR_AFFINE_P (ddr) = false;
2714 }
2715
2716 \f
2717
2718 /* This section contains the classic Banerjee tests.  */
2719
2720 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
2721    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
2722
2723 static inline bool
2724 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2725 {
2726   return (evolution_function_is_constant_p (chrec_a)
2727           && evolution_function_is_constant_p (chrec_b));
2728 }
2729
2730 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
2731    variable, i.e., if the SIV (Single Index Variable) test is true.  */
2732
2733 static bool
2734 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2735 {
2736   if ((evolution_function_is_constant_p (chrec_a)
2737        && evolution_function_is_univariate_p (chrec_b))
2738       || (evolution_function_is_constant_p (chrec_b)
2739           && evolution_function_is_univariate_p (chrec_a)))
2740     return true;
2741
2742   if (evolution_function_is_univariate_p (chrec_a)
2743       && evolution_function_is_univariate_p (chrec_b))
2744     {
2745       switch (TREE_CODE (chrec_a))
2746         {
2747         case POLYNOMIAL_CHREC:
2748           switch (TREE_CODE (chrec_b))
2749             {
2750             case POLYNOMIAL_CHREC:
2751               if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
2752                 return false;
2753               /* FALLTHRU */
2754
2755             default:
2756               return true;
2757             }
2758
2759         default:
2760           return true;
2761         }
2762     }
2763
2764   return false;
2765 }
2766
2767 /* Creates a conflict function with N dimensions.  The affine functions
2768    in each dimension follow.  */
2769
2770 static conflict_function *
2771 conflict_fn (unsigned n, ...)
2772 {
2773   unsigned i;
2774   conflict_function *ret = XCNEW (conflict_function);
2775   va_list ap;
2776
2777   gcc_assert (n > 0 && n <= MAX_DIM);
2778   va_start (ap, n);
2779
2780   ret->n = n;
2781   for (i = 0; i < n; i++)
2782     ret->fns[i] = va_arg (ap, affine_fn);
2783   va_end (ap);
2784
2785   return ret;
2786 }
2787
2788 /* Returns constant affine function with value CST.  */
2789
2790 static affine_fn
2791 affine_fn_cst (tree cst)
2792 {
2793   affine_fn fn;
2794   fn.create (1);
2795   fn.quick_push (cst);
2796   return fn;
2797 }
2798
2799 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
2800
2801 static affine_fn
2802 affine_fn_univar (tree cst, unsigned dim, tree coef)
2803 {
2804   affine_fn fn;
2805   fn.create (dim + 1);
2806   unsigned i;
2807
2808   gcc_assert (dim > 0);
2809   fn.quick_push (cst);
2810   for (i = 1; i < dim; i++)
2811     fn.quick_push (integer_zero_node);
2812   fn.quick_push (coef);
2813   return fn;
2814 }
2815
2816 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
2817    *OVERLAPS_B are initialized to the functions that describe the
2818    relation between the elements accessed twice by CHREC_A and
2819    CHREC_B.  For k >= 0, the following property is verified:
2820
2821    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2822
2823 static void
2824 analyze_ziv_subscript (tree chrec_a,
2825                        tree chrec_b,
2826                        conflict_function **overlaps_a,
2827                        conflict_function **overlaps_b,
2828                        tree *last_conflicts)
2829 {
2830   tree type, difference;
2831   dependence_stats.num_ziv++;
2832
2833   if (dump_file && (dump_flags & TDF_DETAILS))
2834     fprintf (dump_file, "(analyze_ziv_subscript \n");
2835
2836   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2837   chrec_a = chrec_convert (type, chrec_a, NULL);
2838   chrec_b = chrec_convert (type, chrec_b, NULL);
2839   difference = chrec_fold_minus (type, chrec_a, chrec_b);
2840
2841   switch (TREE_CODE (difference))
2842     {
2843     case INTEGER_CST:
2844       if (integer_zerop (difference))
2845         {
2846           /* The difference is equal to zero: the accessed index
2847              overlaps for each iteration in the loop.  */
2848           *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2849           *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2850           *last_conflicts = chrec_dont_know;
2851           dependence_stats.num_ziv_dependent++;
2852         }
2853       else
2854         {
2855           /* The accesses do not overlap.  */
2856           *overlaps_a = conflict_fn_no_dependence ();
2857           *overlaps_b = conflict_fn_no_dependence ();
2858           *last_conflicts = integer_zero_node;
2859           dependence_stats.num_ziv_independent++;
2860         }
2861       break;
2862
2863     default:
2864       /* We're not sure whether the indexes overlap.  For the moment,
2865          conservatively answer "don't know".  */
2866       if (dump_file && (dump_flags & TDF_DETAILS))
2867         fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
2868
2869       *overlaps_a = conflict_fn_not_known ();
2870       *overlaps_b = conflict_fn_not_known ();
2871       *last_conflicts = chrec_dont_know;
2872       dependence_stats.num_ziv_unimplemented++;
2873       break;
2874     }
2875
2876   if (dump_file && (dump_flags & TDF_DETAILS))
2877     fprintf (dump_file, ")\n");
2878 }
2879
2880 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
2881    and only if it fits to the int type.  If this is not the case, or the
2882    bound  on the number of iterations of LOOP could not be derived, returns
2883    chrec_dont_know.  */
2884
2885 static tree
2886 max_stmt_executions_tree (struct loop *loop)
2887 {
2888   widest_int nit;
2889
2890   if (!max_stmt_executions (loop, &nit))
2891     return chrec_dont_know;
2892
2893   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
2894     return chrec_dont_know;
2895
2896   return wide_int_to_tree (unsigned_type_node, nit);
2897 }
2898
2899 /* Determine whether the CHREC is always positive/negative.  If the expression
2900    cannot be statically analyzed, return false, otherwise set the answer into
2901    VALUE.  */
2902
2903 static bool
2904 chrec_is_positive (tree chrec, bool *value)
2905 {
2906   bool value0, value1, value2;
2907   tree end_value, nb_iter;
2908
2909   switch (TREE_CODE (chrec))
2910     {
2911     case POLYNOMIAL_CHREC:
2912       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
2913           || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
2914         return false;
2915
2916       /* FIXME -- overflows.  */
2917       if (value0 == value1)
2918         {
2919           *value = value0;
2920           return true;
2921         }
2922
2923       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
2924          and the proof consists in showing that the sign never
2925          changes during the execution of the loop, from 0 to
2926          loop->nb_iterations.  */
2927       if (!evolution_function_is_affine_p (chrec))
2928         return false;
2929
2930       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
2931       if (chrec_contains_undetermined (nb_iter))
2932         return false;
2933
2934 #if 0
2935       /* TODO -- If the test is after the exit, we may decrease the number of
2936          iterations by one.  */
2937       if (after_exit)
2938         nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
2939 #endif
2940
2941       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
2942
2943       if (!chrec_is_positive (end_value, &value2))
2944         return false;
2945
2946       *value = value0;
2947       return value0 == value1;
2948
2949     case INTEGER_CST:
2950       switch (tree_int_cst_sgn (chrec))
2951         {
2952         case -1:
2953           *value = false;
2954           break;
2955         case 1:
2956           *value = true;
2957           break;
2958         default:
2959           return false;
2960         }
2961       return true;
2962
2963     default:
2964       return false;
2965     }
2966 }
2967
2968
2969 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
2970    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
2971    *OVERLAPS_B are initialized to the functions that describe the
2972    relation between the elements accessed twice by CHREC_A and
2973    CHREC_B.  For k >= 0, the following property is verified:
2974
2975    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2976
2977 static void
2978 analyze_siv_subscript_cst_affine (tree chrec_a,
2979                                   tree chrec_b,
2980                                   conflict_function **overlaps_a,
2981                                   conflict_function **overlaps_b,
2982                                   tree *last_conflicts)
2983 {
2984   bool value0, value1, value2;
2985   tree type, difference, tmp;
2986
2987   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2988   chrec_a = chrec_convert (type, chrec_a, NULL);
2989   chrec_b = chrec_convert (type, chrec_b, NULL);
2990   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
2991
2992   /* Special case overlap in the first iteration.  */
2993   if (integer_zerop (difference))
2994     {
2995       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2996       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2997       *last_conflicts = integer_one_node;
2998       return;
2999     }
3000
3001   if (!chrec_is_positive (initial_condition (difference), &value0))
3002     {
3003       if (dump_file && (dump_flags & TDF_DETAILS))
3004         fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3005
3006       dependence_stats.num_siv_unimplemented++;
3007       *overlaps_a = conflict_fn_not_known ();
3008       *overlaps_b = conflict_fn_not_known ();
3009       *last_conflicts = chrec_dont_know;
3010       return;
3011     }
3012   else
3013     {
3014       if (value0 == false)
3015         {
3016           if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3017             {
3018               if (dump_file && (dump_flags & TDF_DETAILS))
3019                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3020
3021               *overlaps_a = conflict_fn_not_known ();
3022               *overlaps_b = conflict_fn_not_known ();
3023               *last_conflicts = chrec_dont_know;
3024               dependence_stats.num_siv_unimplemented++;
3025               return;
3026             }
3027           else
3028             {
3029               if (value1 == true)
3030                 {
3031                   /* Example:
3032                      chrec_a = 12
3033                      chrec_b = {10, +, 1}
3034                   */
3035
3036                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3037                     {
3038                       HOST_WIDE_INT numiter;
3039                       struct loop *loop = get_chrec_loop (chrec_b);
3040
3041                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3042                       tmp = fold_build2 (EXACT_DIV_EXPR, type,
3043                                          fold_build1 (ABS_EXPR, type, difference),
3044                                          CHREC_RIGHT (chrec_b));
3045                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3046                       *last_conflicts = integer_one_node;
3047
3048
3049                       /* Perform weak-zero siv test to see if overlap is
3050                          outside the loop bounds.  */
3051                       numiter = max_stmt_executions_int (loop);
3052
3053                       if (numiter >= 0
3054                           && compare_tree_int (tmp, numiter) > 0)
3055                         {
3056                           free_conflict_function (*overlaps_a);
3057                           free_conflict_function (*overlaps_b);
3058                           *overlaps_a = conflict_fn_no_dependence ();
3059                           *overlaps_b = conflict_fn_no_dependence ();
3060                           *last_conflicts = integer_zero_node;
3061                           dependence_stats.num_siv_independent++;
3062                           return;
3063                         }
3064                       dependence_stats.num_siv_dependent++;
3065                       return;
3066                     }
3067
3068                   /* When the step does not divide the difference, there are
3069                      no overlaps.  */
3070                   else
3071                     {
3072                       *overlaps_a = conflict_fn_no_dependence ();
3073                       *overlaps_b = conflict_fn_no_dependence ();
3074                       *last_conflicts = integer_zero_node;
3075                       dependence_stats.num_siv_independent++;
3076                       return;
3077                     }
3078                 }
3079
3080               else
3081                 {
3082                   /* Example:
3083                      chrec_a = 12
3084                      chrec_b = {10, +, -1}
3085
3086                      In this case, chrec_a will not overlap with chrec_b.  */
3087                   *overlaps_a = conflict_fn_no_dependence ();
3088                   *overlaps_b = conflict_fn_no_dependence ();
3089                   *last_conflicts = integer_zero_node;
3090                   dependence_stats.num_siv_independent++;
3091                   return;
3092                 }
3093             }
3094         }
3095       else
3096         {
3097           if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3098             {
3099               if (dump_file && (dump_flags & TDF_DETAILS))
3100                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3101
3102               *overlaps_a = conflict_fn_not_known ();
3103               *overlaps_b = conflict_fn_not_known ();
3104               *last_conflicts = chrec_dont_know;
3105               dependence_stats.num_siv_unimplemented++;
3106               return;
3107             }
3108           else
3109             {
3110               if (value2 == false)
3111                 {
3112                   /* Example:
3113                      chrec_a = 3
3114                      chrec_b = {10, +, -1}
3115                   */
3116                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3117                     {
3118                       HOST_WIDE_INT numiter;
3119                       struct loop *loop = get_chrec_loop (chrec_b);
3120
3121                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3122                       tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3123                                          CHREC_RIGHT (chrec_b));
3124                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3125                       *last_conflicts = integer_one_node;
3126
3127                       /* Perform weak-zero siv test to see if overlap is
3128                          outside the loop bounds.  */
3129                       numiter = max_stmt_executions_int (loop);
3130
3131                       if (numiter >= 0
3132                           && compare_tree_int (tmp, numiter) > 0)
3133                         {
3134                           free_conflict_function (*overlaps_a);
3135                           free_conflict_function (*overlaps_b);
3136                           *overlaps_a = conflict_fn_no_dependence ();
3137                           *overlaps_b = conflict_fn_no_dependence ();
3138                           *last_conflicts = integer_zero_node;
3139                           dependence_stats.num_siv_independent++;
3140                           return;
3141                         }
3142                       dependence_stats.num_siv_dependent++;
3143                       return;
3144                     }
3145
3146                   /* When the step does not divide the difference, there
3147                      are no overlaps.  */
3148                   else
3149                     {
3150                       *overlaps_a = conflict_fn_no_dependence ();
3151                       *overlaps_b = conflict_fn_no_dependence ();
3152                       *last_conflicts = integer_zero_node;
3153                       dependence_stats.num_siv_independent++;
3154                       return;
3155                     }
3156                 }
3157               else
3158                 {
3159                   /* Example:
3160                      chrec_a = 3
3161                      chrec_b = {4, +, 1}
3162
3163                      In this case, chrec_a will not overlap with chrec_b.  */
3164                   *overlaps_a = conflict_fn_no_dependence ();
3165                   *overlaps_b = conflict_fn_no_dependence ();
3166                   *last_conflicts = integer_zero_node;
3167                   dependence_stats.num_siv_independent++;
3168                   return;
3169                 }
3170             }
3171         }
3172     }
3173 }
3174
3175 /* Helper recursive function for initializing the matrix A.  Returns
3176    the initial value of CHREC.  */
3177
3178 static tree
3179 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3180 {
3181   gcc_assert (chrec);
3182
3183   switch (TREE_CODE (chrec))
3184     {
3185     case POLYNOMIAL_CHREC:
3186       A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
3187       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3188
3189     case PLUS_EXPR:
3190     case MULT_EXPR:
3191     case MINUS_EXPR:
3192       {
3193         tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3194         tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3195
3196         return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3197       }
3198
3199     CASE_CONVERT:
3200       {
3201         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3202         return chrec_convert (chrec_type (chrec), op, NULL);
3203       }
3204
3205     case BIT_NOT_EXPR:
3206       {
3207         /* Handle ~X as -1 - X.  */
3208         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3209         return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
3210                               build_int_cst (TREE_TYPE (chrec), -1), op);
3211       }
3212
3213     case INTEGER_CST:
3214       return chrec;
3215
3216     default:
3217       gcc_unreachable ();
3218       return NULL_TREE;
3219     }
3220 }
3221
3222 #define FLOOR_DIV(x,y) ((x) / (y))
3223
3224 /* Solves the special case of the Diophantine equation:
3225    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3226
3227    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
3228    number of iterations that loops X and Y run.  The overlaps will be
3229    constructed as evolutions in dimension DIM.  */
3230
3231 static void
3232 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
3233                                          HOST_WIDE_INT step_a,
3234                                          HOST_WIDE_INT step_b,
3235                                          affine_fn *overlaps_a,
3236                                          affine_fn *overlaps_b,
3237                                          tree *last_conflicts, int dim)
3238 {
3239   if (((step_a > 0 && step_b > 0)
3240        || (step_a < 0 && step_b < 0)))
3241     {
3242       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
3243       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
3244
3245       gcd_steps_a_b = gcd (step_a, step_b);
3246       step_overlaps_a = step_b / gcd_steps_a_b;
3247       step_overlaps_b = step_a / gcd_steps_a_b;
3248
3249       if (niter > 0)
3250         {
3251           tau2 = FLOOR_DIV (niter, step_overlaps_a);
3252           tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
3253           last_conflict = tau2;
3254           *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3255         }
3256       else
3257         *last_conflicts = chrec_dont_know;
3258
3259       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
3260                                       build_int_cst (NULL_TREE,
3261                                                      step_overlaps_a));
3262       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
3263                                       build_int_cst (NULL_TREE,
3264                                                      step_overlaps_b));
3265     }
3266
3267   else
3268     {
3269       *overlaps_a = affine_fn_cst (integer_zero_node);
3270       *overlaps_b = affine_fn_cst (integer_zero_node);
3271       *last_conflicts = integer_zero_node;
3272     }
3273 }
3274
3275 /* Solves the special case of a Diophantine equation where CHREC_A is
3276    an affine bivariate function, and CHREC_B is an affine univariate
3277    function.  For example,
3278
3279    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
3280
3281    has the following overlapping functions:
3282
3283    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
3284    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
3285    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
3286
3287    FORNOW: This is a specialized implementation for a case occurring in
3288    a common benchmark.  Implement the general algorithm.  */
3289
3290 static void
3291 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
3292                                       conflict_function **overlaps_a,
3293                                       conflict_function **overlaps_b,
3294                                       tree *last_conflicts)
3295 {
3296   bool xz_p, yz_p, xyz_p;
3297   HOST_WIDE_INT step_x, step_y, step_z;
3298   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
3299   affine_fn overlaps_a_xz, overlaps_b_xz;
3300   affine_fn overlaps_a_yz, overlaps_b_yz;
3301   affine_fn overlaps_a_xyz, overlaps_b_xyz;
3302   affine_fn ova1, ova2, ovb;
3303   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
3304
3305   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
3306   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
3307   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
3308
3309   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
3310   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
3311   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
3312
3313   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
3314     {
3315       if (dump_file && (dump_flags & TDF_DETAILS))
3316         fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
3317
3318       *overlaps_a = conflict_fn_not_known ();
3319       *overlaps_b = conflict_fn_not_known ();
3320       *last_conflicts = chrec_dont_know;
3321       return;
3322     }
3323
3324   niter = MIN (niter_x, niter_z);
3325   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
3326                                            &overlaps_a_xz,
3327                                            &overlaps_b_xz,
3328                                            &last_conflicts_xz, 1);
3329   niter = MIN (niter_y, niter_z);
3330   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
3331                                            &overlaps_a_yz,
3332                                            &overlaps_b_yz,
3333                                            &last_conflicts_yz, 2);
3334   niter = MIN (niter_x, niter_z);
3335   niter = MIN (niter_y, niter);
3336   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
3337                                            &overlaps_a_xyz,
3338                                            &overlaps_b_xyz,
3339                                            &last_conflicts_xyz, 3);
3340
3341   xz_p = !integer_zerop (last_conflicts_xz);
3342   yz_p = !integer_zerop (last_conflicts_yz);
3343   xyz_p = !integer_zerop (last_conflicts_xyz);
3344
3345   if (xz_p || yz_p || xyz_p)
3346     {
3347       ova1 = affine_fn_cst (integer_zero_node);
3348       ova2 = affine_fn_cst (integer_zero_node);
3349       ovb = affine_fn_cst (integer_zero_node);
3350       if (xz_p)
3351         {
3352           affine_fn t0 = ova1;
3353           affine_fn t2 = ovb;
3354
3355           ova1 = affine_fn_plus (ova1, overlaps_a_xz);
3356           ovb = affine_fn_plus (ovb, overlaps_b_xz);
3357           affine_fn_free (t0);
3358           affine_fn_free (t2);
3359           *last_conflicts = last_conflicts_xz;
3360         }
3361       if (yz_p)
3362         {
3363           affine_fn t0 = ova2;
3364           affine_fn t2 = ovb;
3365
3366           ova2 = affine_fn_plus (ova2, overlaps_a_yz);
3367           ovb = affine_fn_plus (ovb, overlaps_b_yz);
3368           affine_fn_free (t0);
3369           affine_fn_free (t2);
3370           *last_conflicts = last_conflicts_yz;
3371         }
3372       if (xyz_p)
3373         {
3374           affine_fn t0 = ova1;
3375           affine_fn t2 = ova2;
3376           affine_fn t4 = ovb;
3377
3378           ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
3379           ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
3380           ovb = affine_fn_plus (ovb, overlaps_b_xyz);
3381           affine_fn_free (t0);
3382           affine_fn_free (t2);
3383           affine_fn_free (t4);
3384           *last_conflicts = last_conflicts_xyz;
3385         }
3386       *overlaps_a = conflict_fn (2, ova1, ova2);
3387       *overlaps_b = conflict_fn (1, ovb);
3388     }
3389   else
3390     {
3391       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3392       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3393       *last_conflicts = integer_zero_node;
3394     }
3395
3396   affine_fn_free (overlaps_a_xz);
3397   affine_fn_free (overlaps_b_xz);
3398   affine_fn_free (overlaps_a_yz);
3399   affine_fn_free (overlaps_b_yz);
3400   affine_fn_free (overlaps_a_xyz);
3401   affine_fn_free (overlaps_b_xyz);
3402 }
3403
3404 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
3405
3406 static void
3407 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
3408                     int size)
3409 {
3410   memcpy (vec2, vec1, size * sizeof (*vec1));
3411 }
3412
3413 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
3414
3415 static void
3416 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
3417                     int m, int n)
3418 {
3419   int i;
3420
3421   for (i = 0; i < m; i++)
3422     lambda_vector_copy (mat1[i], mat2[i], n);
3423 }
3424
3425 /* Store the N x N identity matrix in MAT.  */
3426
3427 static void
3428 lambda_matrix_id (lambda_matrix mat, int size)
3429 {
3430   int i, j;
3431
3432   for (i = 0; i < size; i++)
3433     for (j = 0; j < size; j++)
3434       mat[i][j] = (i == j) ? 1 : 0;
3435 }
3436
3437 /* Return the first nonzero element of vector VEC1 between START and N.
3438    We must have START <= N.   Returns N if VEC1 is the zero vector.  */
3439
3440 static int
3441 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
3442 {
3443   int j = start;
3444   while (j < n && vec1[j] == 0)
3445     j++;
3446   return j;
3447 }
3448
3449 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3450    R2 = R2 + CONST1 * R1.  */
3451
3452 static void
3453 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
3454 {
3455   int i;
3456
3457   if (const1 == 0)
3458     return;
3459
3460   for (i = 0; i < n; i++)
3461     mat[r2][i] += const1 * mat[r1][i];
3462 }
3463
3464 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3465    and store the result in VEC2.  */
3466
3467 static void
3468 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
3469                           int size, int const1)
3470 {
3471   int i;
3472
3473   if (const1 == 0)
3474     lambda_vector_clear (vec2, size);
3475   else
3476     for (i = 0; i < size; i++)
3477       vec2[i] = const1 * vec1[i];
3478 }
3479
3480 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
3481
3482 static void
3483 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
3484                       int size)
3485 {
3486   lambda_vector_mult_const (vec1, vec2, size, -1);
3487 }
3488
3489 /* Negate row R1 of matrix MAT which has N columns.  */
3490
3491 static void
3492 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
3493 {
3494   lambda_vector_negate (mat[r1], mat[r1], n);
3495 }
3496
3497 /* Return true if two vectors are equal.  */
3498
3499 static bool
3500 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
3501 {
3502   int i;
3503   for (i = 0; i < size; i++)
3504     if (vec1[i] != vec2[i])
3505       return false;
3506   return true;
3507 }
3508
3509 /* Given an M x N integer matrix A, this function determines an M x
3510    M unimodular matrix U, and an M x N echelon matrix S such that
3511    "U.A = S".  This decomposition is also known as "right Hermite".
3512
3513    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3514    Restructuring Compilers" Utpal Banerjee.  */
3515
3516 static void
3517 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
3518                              lambda_matrix S, lambda_matrix U)
3519 {
3520   int i, j, i0 = 0;
3521
3522   lambda_matrix_copy (A, S, m, n);
3523   lambda_matrix_id (U, m);
3524
3525   for (j = 0; j < n; j++)
3526     {
3527       if (lambda_vector_first_nz (S[j], m, i0) < m)
3528         {
3529           ++i0;
3530           for (i = m - 1; i >= i0; i--)
3531             {
3532               while (S[i][j] != 0)
3533                 {
3534                   int sigma, factor, a, b;
3535
3536                   a = S[i-1][j];
3537                   b = S[i][j];
3538                   sigma = (a * b < 0) ? -1: 1;
3539                   a = abs (a);
3540                   b = abs (b);
3541                   factor = sigma * (a / b);
3542
3543                   lambda_matrix_row_add (S, n, i, i-1, -factor);
3544                   std::swap (S[i], S[i-1]);
3545
3546                   lambda_matrix_row_add (U, m, i, i-1, -factor);
3547                   std::swap (U[i], U[i-1]);
3548                 }
3549             }
3550         }
3551     }
3552 }
3553
3554 /* Determines the overlapping elements due to accesses CHREC_A and
3555    CHREC_B, that are affine functions.  This function cannot handle
3556    symbolic evolution functions, ie. when initial conditions are
3557    parameters, because it uses lambda matrices of integers.  */
3558
3559 static void
3560 analyze_subscript_affine_affine (tree chrec_a,
3561                                  tree chrec_b,
3562                                  conflict_function **overlaps_a,
3563                                  conflict_function **overlaps_b,
3564                                  tree *last_conflicts)
3565 {
3566   unsigned nb_vars_a, nb_vars_b, dim;
3567   HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
3568   lambda_matrix A, U, S;
3569   struct obstack scratch_obstack;
3570
3571   if (eq_evolutions_p (chrec_a, chrec_b))
3572     {
3573       /* The accessed index overlaps for each iteration in the
3574          loop.  */
3575       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3576       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3577       *last_conflicts = chrec_dont_know;
3578       return;
3579     }
3580   if (dump_file && (dump_flags & TDF_DETAILS))
3581     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
3582
3583   /* For determining the initial intersection, we have to solve a
3584      Diophantine equation.  This is the most time consuming part.
3585
3586      For answering to the question: "Is there a dependence?" we have
3587      to prove that there exists a solution to the Diophantine
3588      equation, and that the solution is in the iteration domain,
3589      i.e. the solution is positive or zero, and that the solution
3590      happens before the upper bound loop.nb_iterations.  Otherwise
3591      there is no dependence.  This function outputs a description of
3592      the iterations that hold the intersections.  */
3593
3594   nb_vars_a = nb_vars_in_chrec (chrec_a);
3595   nb_vars_b = nb_vars_in_chrec (chrec_b);
3596
3597   gcc_obstack_init (&scratch_obstack);
3598
3599   dim = nb_vars_a + nb_vars_b;
3600   U = lambda_matrix_new (dim, dim, &scratch_obstack);
3601   A = lambda_matrix_new (dim, 1, &scratch_obstack);
3602   S = lambda_matrix_new (dim, 1, &scratch_obstack);
3603
3604   init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
3605   init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
3606   gamma = init_b - init_a;
3607
3608   /* Don't do all the hard work of solving the Diophantine equation
3609      when we already know the solution: for example,
3610      | {3, +, 1}_1
3611      | {3, +, 4}_2
3612      | gamma = 3 - 3 = 0.
3613      Then the first overlap occurs during the first iterations:
3614      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
3615   */
3616   if (gamma == 0)
3617     {
3618       if (nb_vars_a == 1 && nb_vars_b == 1)
3619         {
3620           HOST_WIDE_INT step_a, step_b;
3621           HOST_WIDE_INT niter, niter_a, niter_b;
3622           affine_fn ova, ovb;
3623
3624           niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
3625           niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
3626           niter = MIN (niter_a, niter_b);
3627           step_a = int_cst_value (CHREC_RIGHT (chrec_a));
3628           step_b = int_cst_value (CHREC_RIGHT (chrec_b));
3629
3630           compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
3631                                                    &ova, &ovb,
3632                                                    last_conflicts, 1);
3633           *overlaps_a = conflict_fn (1, ova);
3634           *overlaps_b = conflict_fn (1, ovb);
3635         }
3636
3637       else if (nb_vars_a == 2 && nb_vars_b == 1)
3638         compute_overlap_steps_for_affine_1_2
3639           (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
3640
3641       else if (nb_vars_a == 1 && nb_vars_b == 2)
3642         compute_overlap_steps_for_affine_1_2
3643           (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
3644
3645       else
3646         {
3647           if (dump_file && (dump_flags & TDF_DETAILS))
3648             fprintf (dump_file, "affine-affine test failed: too many variables.\n");
3649           *overlaps_a = conflict_fn_not_known ();
3650           *overlaps_b = conflict_fn_not_known ();
3651           *last_conflicts = chrec_dont_know;
3652         }
3653       goto end_analyze_subs_aa;
3654     }
3655
3656   /* U.A = S */
3657   lambda_matrix_right_hermite (A, dim, 1, S, U);
3658
3659   if (S[0][0] < 0)
3660     {
3661       S[0][0] *= -1;
3662       lambda_matrix_row_negate (U, dim, 0);
3663     }
3664   gcd_alpha_beta = S[0][0];
3665
3666   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
3667      but that is a quite strange case.  Instead of ICEing, answer
3668      don't know.  */
3669   if (gcd_alpha_beta == 0)
3670     {
3671       *overlaps_a = conflict_fn_not_known ();
3672       *overlaps_b = conflict_fn_not_known ();
3673       *last_conflicts = chrec_dont_know;
3674       goto end_analyze_subs_aa;
3675     }
3676
3677   /* The classic "gcd-test".  */
3678   if (!int_divides_p (gcd_alpha_beta, gamma))
3679     {
3680       /* The "gcd-test" has determined that there is no integer
3681          solution, i.e. there is no dependence.  */
3682       *overlaps_a = conflict_fn_no_dependence ();
3683       *overlaps_b = conflict_fn_no_dependence ();
3684       *last_conflicts = integer_zero_node;
3685     }
3686
3687   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
3688   else if (nb_vars_a == 1 && nb_vars_b == 1)
3689     {
3690       /* Both functions should have the same evolution sign.  */
3691       if (((A[0][0] > 0 && -A[1][0] > 0)
3692            || (A[0][0] < 0 && -A[1][0] < 0)))
3693         {
3694           /* The solutions are given by:
3695              |
3696              | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
3697              |                           [u21 u22]    [y0]
3698
3699              For a given integer t.  Using the following variables,
3700
3701              | i0 = u11 * gamma / gcd_alpha_beta
3702              | j0 = u12 * gamma / gcd_alpha_beta
3703              | i1 = u21
3704              | j1 = u22
3705
3706              the solutions are:
3707
3708              | x0 = i0 + i1 * t,
3709              | y0 = j0 + j1 * t.  */
3710           HOST_WIDE_INT i0, j0, i1, j1;
3711
3712           i0 = U[0][0] * gamma / gcd_alpha_beta;
3713           j0 = U[0][1] * gamma / gcd_alpha_beta;
3714           i1 = U[1][0];
3715           j1 = U[1][1];
3716
3717           if ((i1 == 0 && i0 < 0)
3718               || (j1 == 0 && j0 < 0))
3719             {
3720               /* There is no solution.
3721                  FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
3722                  falls in here, but for the moment we don't look at the
3723                  upper bound of the iteration domain.  */
3724               *overlaps_a = conflict_fn_no_dependence ();
3725               *overlaps_b = conflict_fn_no_dependence ();
3726               *last_conflicts = integer_zero_node;
3727               goto end_analyze_subs_aa;
3728             }
3729
3730           if (i1 > 0 && j1 > 0)
3731             {
3732               HOST_WIDE_INT niter_a
3733                 = max_stmt_executions_int (get_chrec_loop (chrec_a));
3734               HOST_WIDE_INT niter_b
3735                 = max_stmt_executions_int (get_chrec_loop (chrec_b));
3736               HOST_WIDE_INT niter = MIN (niter_a, niter_b);
3737
3738               /* (X0, Y0) is a solution of the Diophantine equation:
3739                  "chrec_a (X0) = chrec_b (Y0)".  */
3740               HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
3741                                         CEIL (-j0, j1));
3742               HOST_WIDE_INT x0 = i1 * tau1 + i0;
3743               HOST_WIDE_INT y0 = j1 * tau1 + j0;
3744
3745               /* (X1, Y1) is the smallest positive solution of the eq
3746                  "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
3747                  first conflict occurs.  */
3748               HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
3749               HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
3750               HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
3751
3752               if (niter > 0)
3753                 {
3754                   HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter_a - i0, i1),
3755                                             FLOOR_DIV (niter_b - j0, j1));
3756                   HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
3757
3758                   /* If the overlap occurs outside of the bounds of the
3759                      loop, there is no dependence.  */
3760                   if (x1 >= niter_a || y1 >= niter_b)
3761                     {
3762                       *overlaps_a = conflict_fn_no_dependence ();
3763                       *overlaps_b = conflict_fn_no_dependence ();
3764                       *last_conflicts = integer_zero_node;
3765                       goto end_analyze_subs_aa;
3766                     }
3767                   else
3768                     *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3769                 }
3770               else
3771                 *last_conflicts = chrec_dont_know;
3772
3773               *overlaps_a
3774                 = conflict_fn (1,
3775                                affine_fn_univar (build_int_cst (NULL_TREE, x1),
3776                                                  1,
3777                                                  build_int_cst (NULL_TREE, i1)));
3778               *overlaps_b
3779                 = conflict_fn (1,
3780                                affine_fn_univar (build_int_cst (NULL_TREE, y1),
3781                                                  1,
3782                                                  build_int_cst (NULL_TREE, j1)));
3783             }
3784           else
3785             {
3786               /* FIXME: For the moment, the upper bound of the
3787                  iteration domain for i and j is not checked.  */
3788               if (dump_file && (dump_flags & TDF_DETAILS))
3789                 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3790               *overlaps_a = conflict_fn_not_known ();
3791               *overlaps_b = conflict_fn_not_known ();
3792               *last_conflicts = chrec_dont_know;
3793             }
3794         }
3795       else
3796         {
3797           if (dump_file && (dump_flags & TDF_DETAILS))
3798             fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3799           *overlaps_a = conflict_fn_not_known ();
3800           *overlaps_b = conflict_fn_not_known ();
3801           *last_conflicts = chrec_dont_know;
3802         }
3803     }
3804   else
3805     {
3806       if (dump_file && (dump_flags & TDF_DETAILS))
3807         fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3808       *overlaps_a = conflict_fn_not_known ();
3809       *overlaps_b = conflict_fn_not_known ();
3810       *last_conflicts = chrec_dont_know;
3811     }
3812
3813 end_analyze_subs_aa:
3814   obstack_free (&scratch_obstack, NULL);
3815   if (dump_file && (dump_flags & TDF_DETAILS))
3816     {
3817       fprintf (dump_file, "  (overlaps_a = ");
3818       dump_conflict_function (dump_file, *overlaps_a);
3819       fprintf (dump_file, ")\n  (overlaps_b = ");
3820       dump_conflict_function (dump_file, *overlaps_b);
3821       fprintf (dump_file, "))\n");
3822     }
3823 }
3824
3825 /* Returns true when analyze_subscript_affine_affine can be used for
3826    determining the dependence relation between chrec_a and chrec_b,
3827    that contain symbols.  This function modifies chrec_a and chrec_b
3828    such that the analysis result is the same, and such that they don't
3829    contain symbols, and then can safely be passed to the analyzer.
3830
3831    Example: The analysis of the following tuples of evolutions produce
3832    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
3833    vs. {0, +, 1}_1
3834
3835    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
3836    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
3837 */
3838
3839 static bool
3840 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
3841 {
3842   tree diff, type, left_a, left_b, right_b;
3843
3844   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
3845       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
3846     /* FIXME: For the moment not handled.  Might be refined later.  */
3847     return false;
3848
3849   type = chrec_type (*chrec_a);
3850   left_a = CHREC_LEFT (*chrec_a);
3851   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
3852   diff = chrec_fold_minus (type, left_a, left_b);
3853
3854   if (!evolution_function_is_constant_p (diff))
3855     return false;
3856
3857   if (dump_file && (dump_flags & TDF_DETAILS))
3858     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
3859
3860   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
3861                                      diff, CHREC_RIGHT (*chrec_a));
3862   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
3863   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
3864                                      build_int_cst (type, 0),
3865                                      right_b);
3866   return true;
3867 }
3868
3869 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
3870    *OVERLAPS_B are initialized to the functions that describe the
3871    relation between the elements accessed twice by CHREC_A and
3872    CHREC_B.  For k >= 0, the following property is verified:
3873
3874    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3875
3876 static void
3877 analyze_siv_subscript (tree chrec_a,
3878                        tree chrec_b,
3879                        conflict_function **overlaps_a,
3880                        conflict_function **overlaps_b,
3881                        tree *last_conflicts,
3882                        int loop_nest_num)
3883 {
3884   dependence_stats.num_siv++;
3885
3886   if (dump_file && (dump_flags & TDF_DETAILS))
3887     fprintf (dump_file, "(analyze_siv_subscript \n");
3888
3889   if (evolution_function_is_constant_p (chrec_a)
3890       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3891     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
3892                                       overlaps_a, overlaps_b, last_conflicts);
3893
3894   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3895            && evolution_function_is_constant_p (chrec_b))
3896     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
3897                                       overlaps_b, overlaps_a, last_conflicts);
3898
3899   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3900            && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3901     {
3902       if (!chrec_contains_symbols (chrec_a)
3903           && !chrec_contains_symbols (chrec_b))
3904         {
3905           analyze_subscript_affine_affine (chrec_a, chrec_b,
3906                                            overlaps_a, overlaps_b,
3907                                            last_conflicts);
3908
3909           if (CF_NOT_KNOWN_P (*overlaps_a)
3910               || CF_NOT_KNOWN_P (*overlaps_b))
3911             dependence_stats.num_siv_unimplemented++;
3912           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3913                    || CF_NO_DEPENDENCE_P (*overlaps_b))
3914             dependence_stats.num_siv_independent++;
3915           else
3916             dependence_stats.num_siv_dependent++;
3917         }
3918       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
3919                                                         &chrec_b))
3920         {
3921           analyze_subscript_affine_affine (chrec_a, chrec_b,
3922                                            overlaps_a, overlaps_b,
3923                                            last_conflicts);
3924
3925           if (CF_NOT_KNOWN_P (*overlaps_a)
3926               || CF_NOT_KNOWN_P (*overlaps_b))
3927             dependence_stats.num_siv_unimplemented++;
3928           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3929                    || CF_NO_DEPENDENCE_P (*overlaps_b))
3930             dependence_stats.num_siv_independent++;
3931           else
3932             dependence_stats.num_siv_dependent++;
3933         }
3934       else
3935         goto siv_subscript_dontknow;
3936     }
3937
3938   else
3939     {
3940     siv_subscript_dontknow:;
3941       if (dump_file && (dump_flags & TDF_DETAILS))
3942         fprintf (dump_file, "  siv test failed: unimplemented");
3943       *overlaps_a = conflict_fn_not_known ();
3944       *overlaps_b = conflict_fn_not_known ();
3945       *last_conflicts = chrec_dont_know;
3946       dependence_stats.num_siv_unimplemented++;
3947     }
3948
3949   if (dump_file && (dump_flags & TDF_DETAILS))
3950     fprintf (dump_file, ")\n");
3951 }
3952
3953 /* Returns false if we can prove that the greatest common divisor of the steps
3954    of CHREC does not divide CST, false otherwise.  */
3955
3956 static bool
3957 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
3958 {
3959   HOST_WIDE_INT cd = 0, val;
3960   tree step;
3961
3962   if (!tree_fits_shwi_p (cst))
3963     return true;
3964   val = tree_to_shwi (cst);
3965
3966   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
3967     {
3968       step = CHREC_RIGHT (chrec);
3969       if (!tree_fits_shwi_p (step))
3970         return true;
3971       cd = gcd (cd, tree_to_shwi (step));
3972       chrec = CHREC_LEFT (chrec);
3973     }
3974
3975   return val % cd == 0;
3976 }
3977
3978 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
3979    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
3980    functions that describe the relation between the elements accessed
3981    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
3982    is verified:
3983
3984    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3985
3986 static void
3987 analyze_miv_subscript (tree chrec_a,
3988                        tree chrec_b,
3989                        conflict_function **overlaps_a,
3990                        conflict_function **overlaps_b,
3991                        tree *last_conflicts,
3992                        struct loop *loop_nest)
3993 {
3994   tree type, difference;
3995
3996   dependence_stats.num_miv++;
3997   if (dump_file && (dump_flags & TDF_DETAILS))
3998     fprintf (dump_file, "(analyze_miv_subscript \n");
3999
4000   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4001   chrec_a = chrec_convert (type, chrec_a, NULL);
4002   chrec_b = chrec_convert (type, chrec_b, NULL);
4003   difference = chrec_fold_minus (type, chrec_a, chrec_b);
4004
4005   if (eq_evolutions_p (chrec_a, chrec_b))
4006     {
4007       /* Access functions are the same: all the elements are accessed
4008          in the same order.  */
4009       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4010       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4011       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4012       dependence_stats.num_miv_dependent++;
4013     }
4014
4015   else if (evolution_function_is_constant_p (difference)
4016            && evolution_function_is_affine_multivariate_p (chrec_a,
4017                                                            loop_nest->num)
4018            && !gcd_of_steps_may_divide_p (chrec_a, difference))
4019     {
4020       /* testsuite/.../ssa-chrec-33.c
4021          {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
4022
4023          The difference is 1, and all the evolution steps are multiples
4024          of 2, consequently there are no overlapping elements.  */
4025       *overlaps_a = conflict_fn_no_dependence ();
4026       *overlaps_b = conflict_fn_no_dependence ();
4027       *last_conflicts = integer_zero_node;
4028       dependence_stats.num_miv_independent++;
4029     }
4030
4031   else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
4032            && !chrec_contains_symbols (chrec_a)
4033            && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
4034            && !chrec_contains_symbols (chrec_b))
4035     {
4036       /* testsuite/.../ssa-chrec-35.c
4037          {0, +, 1}_2  vs.  {0, +, 1}_3
4038          the overlapping elements are respectively located at iterations:
4039          {0, +, 1}_x and {0, +, 1}_x,
4040          in other words, we have the equality:
4041          {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4042
4043          Other examples:
4044          {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4045          {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4046
4047          {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4048          {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4049       */
4050       analyze_subscript_affine_affine (chrec_a, chrec_b,
4051                                        overlaps_a, overlaps_b, last_conflicts);
4052
4053       if (CF_NOT_KNOWN_P (*overlaps_a)
4054           || CF_NOT_KNOWN_P (*overlaps_b))
4055         dependence_stats.num_miv_unimplemented++;
4056       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4057                || CF_NO_DEPENDENCE_P (*overlaps_b))
4058         dependence_stats.num_miv_independent++;
4059       else
4060         dependence_stats.num_miv_dependent++;
4061     }
4062
4063   else
4064     {
4065       /* When the analysis is too difficult, answer "don't know".  */
4066       if (dump_file && (dump_flags & TDF_DETAILS))
4067         fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4068
4069       *overlaps_a = conflict_fn_not_known ();
4070       *overlaps_b = conflict_fn_not_known ();
4071       *last_conflicts = chrec_dont_know;
4072       dependence_stats.num_miv_unimplemented++;
4073     }
4074
4075   if (dump_file && (dump_flags & TDF_DETAILS))
4076     fprintf (dump_file, ")\n");
4077 }
4078
4079 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4080    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
4081    OVERLAP_ITERATIONS_B are initialized with two functions that
4082    describe the iterations that contain conflicting elements.
4083
4084    Remark: For an integer k >= 0, the following equality is true:
4085
4086    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4087 */
4088
4089 static void
4090 analyze_overlapping_iterations (tree chrec_a,
4091                                 tree chrec_b,
4092                                 conflict_function **overlap_iterations_a,
4093                                 conflict_function **overlap_iterations_b,
4094                                 tree *last_conflicts, struct loop *loop_nest)
4095 {
4096   unsigned int lnn = loop_nest->num;
4097
4098   dependence_stats.num_subscript_tests++;
4099
4100   if (dump_file && (dump_flags & TDF_DETAILS))
4101     {
4102       fprintf (dump_file, "(analyze_overlapping_iterations \n");
4103       fprintf (dump_file, "  (chrec_a = ");
4104       print_generic_expr (dump_file, chrec_a);
4105       fprintf (dump_file, ")\n  (chrec_b = ");
4106       print_generic_expr (dump_file, chrec_b);
4107       fprintf (dump_file, ")\n");
4108     }
4109
4110   if (chrec_a == NULL_TREE
4111       || chrec_b == NULL_TREE
4112       || chrec_contains_undetermined (chrec_a)
4113       || chrec_contains_undetermined (chrec_b))
4114     {
4115       dependence_stats.num_subscript_undetermined++;
4116
4117       *overlap_iterations_a = conflict_fn_not_known ();
4118       *overlap_iterations_b = conflict_fn_not_known ();
4119     }
4120
4121   /* If they are the same chrec, and are affine, they overlap
4122      on every iteration.  */
4123   else if (eq_evolutions_p (chrec_a, chrec_b)
4124            && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4125                || operand_equal_p (chrec_a, chrec_b, 0)))
4126     {
4127       dependence_stats.num_same_subscript_function++;
4128       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4129       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4130       *last_conflicts = chrec_dont_know;
4131     }
4132
4133   /* If they aren't the same, and aren't affine, we can't do anything
4134      yet.  */
4135   else if ((chrec_contains_symbols (chrec_a)
4136             || chrec_contains_symbols (chrec_b))
4137            && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4138                || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4139     {
4140       dependence_stats.num_subscript_undetermined++;
4141       *overlap_iterations_a = conflict_fn_not_known ();
4142       *overlap_iterations_b = conflict_fn_not_known ();
4143     }
4144
4145   else if (ziv_subscript_p (chrec_a, chrec_b))
4146     analyze_ziv_subscript (chrec_a, chrec_b,
4147                            overlap_iterations_a, overlap_iterations_b,
4148                            last_conflicts);
4149
4150   else if (siv_subscript_p (chrec_a, chrec_b))
4151     analyze_siv_subscript (chrec_a, chrec_b,
4152                            overlap_iterations_a, overlap_iterations_b,
4153                            last_conflicts, lnn);
4154
4155   else
4156     analyze_miv_subscript (chrec_a, chrec_b,
4157                            overlap_iterations_a, overlap_iterations_b,
4158                            last_conflicts, loop_nest);
4159
4160   if (dump_file && (dump_flags & TDF_DETAILS))
4161     {
4162       fprintf (dump_file, "  (overlap_iterations_a = ");
4163       dump_conflict_function (dump_file, *overlap_iterations_a);
4164       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
4165       dump_conflict_function (dump_file, *overlap_iterations_b);
4166       fprintf (dump_file, "))\n");
4167     }
4168 }
4169
4170 /* Helper function for uniquely inserting distance vectors.  */
4171
4172 static void
4173 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
4174 {
4175   unsigned i;
4176   lambda_vector v;
4177
4178   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
4179     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
4180       return;
4181
4182   DDR_DIST_VECTS (ddr).safe_push (dist_v);
4183 }
4184
4185 /* Helper function for uniquely inserting direction vectors.  */
4186
4187 static void
4188 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
4189 {
4190   unsigned i;
4191   lambda_vector v;
4192
4193   FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
4194     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
4195       return;
4196
4197   DDR_DIR_VECTS (ddr).safe_push (dir_v);
4198 }
4199
4200 /* Add a distance of 1 on all the loops outer than INDEX.  If we
4201    haven't yet determined a distance for this outer loop, push a new
4202    distance vector composed of the previous distance, and a distance
4203    of 1 for this outer loop.  Example:
4204
4205    | loop_1
4206    |   loop_2
4207    |     A[10]
4208    |   endloop_2
4209    | endloop_1
4210
4211    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
4212    save (0, 1), then we have to save (1, 0).  */
4213
4214 static void
4215 add_outer_distances (struct data_dependence_relation *ddr,
4216                      lambda_vector dist_v, int index)
4217 {
4218   /* For each outer loop where init_v is not set, the accesses are
4219      in dependence of distance 1 in the loop.  */
4220   while (--index >= 0)
4221     {
4222       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4223       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4224       save_v[index] = 1;
4225       save_dist_v (ddr, save_v);
4226     }
4227 }
4228
4229 /* Return false when fail to represent the data dependence as a
4230    distance vector.  A_INDEX is the index of the first reference
4231    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
4232    second reference.  INIT_B is set to true when a component has been
4233    added to the distance vector DIST_V.  INDEX_CARRY is then set to
4234    the index in DIST_V that carries the dependence.  */
4235
4236 static bool
4237 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
4238                              unsigned int a_index, unsigned int b_index,
4239                              lambda_vector dist_v, bool *init_b,
4240                              int *index_carry)
4241 {
4242   unsigned i;
4243   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4244
4245   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4246     {
4247       tree access_fn_a, access_fn_b;
4248       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
4249
4250       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4251         {
4252           non_affine_dependence_relation (ddr);
4253           return false;
4254         }
4255
4256       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
4257       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
4258
4259       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
4260           && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
4261         {
4262           HOST_WIDE_INT dist;
4263           int index;
4264           int var_a = CHREC_VARIABLE (access_fn_a);
4265           int var_b = CHREC_VARIABLE (access_fn_b);
4266
4267           if (var_a != var_b
4268               || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4269             {
4270               non_affine_dependence_relation (ddr);
4271               return false;
4272             }
4273
4274           dist = int_cst_value (SUB_DISTANCE (subscript));
4275           index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
4276           *index_carry = MIN (index, *index_carry);
4277
4278           /* This is the subscript coupling test.  If we have already
4279              recorded a distance for this loop (a distance coming from
4280              another subscript), it should be the same.  For example,
4281              in the following code, there is no dependence:
4282
4283              | loop i = 0, N, 1
4284              |   T[i+1][i] = ...
4285              |   ... = T[i][i]
4286              | endloop
4287           */
4288           if (init_v[index] != 0 && dist_v[index] != dist)
4289             {
4290               finalize_ddr_dependent (ddr, chrec_known);
4291               return false;
4292             }
4293
4294           dist_v[index] = dist;
4295           init_v[index] = 1;
4296           *init_b = true;
4297         }
4298       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
4299         {
4300           /* This can be for example an affine vs. constant dependence
4301              (T[i] vs. T[3]) that is not an affine dependence and is
4302              not representable as a distance vector.  */
4303           non_affine_dependence_relation (ddr);
4304           return false;
4305         }
4306     }
4307
4308   return true;
4309 }
4310
4311 /* Return true when the DDR contains only constant access functions.  */
4312
4313 static bool
4314 constant_access_functions (const struct data_dependence_relation *ddr)
4315 {
4316   unsigned i;
4317   subscript *sub;
4318
4319   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4320     if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
4321         || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
4322       return false;
4323
4324   return true;
4325 }
4326
4327 /* Helper function for the case where DDR_A and DDR_B are the same
4328    multivariate access function with a constant step.  For an example
4329    see pr34635-1.c.  */
4330
4331 static void
4332 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
4333 {
4334   int x_1, x_2;
4335   tree c_1 = CHREC_LEFT (c_2);
4336   tree c_0 = CHREC_LEFT (c_1);
4337   lambda_vector dist_v;
4338   HOST_WIDE_INT v1, v2, cd;
4339
4340   /* Polynomials with more than 2 variables are not handled yet.  When
4341      the evolution steps are parameters, it is not possible to
4342      represent the dependence using classical distance vectors.  */
4343   if (TREE_CODE (c_0) != INTEGER_CST
4344       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
4345       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
4346     {
4347       DDR_AFFINE_P (ddr) = false;
4348       return;
4349     }
4350
4351   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
4352   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
4353
4354   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
4355   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4356   v1 = int_cst_value (CHREC_RIGHT (c_1));
4357   v2 = int_cst_value (CHREC_RIGHT (c_2));
4358   cd = gcd (v1, v2);
4359   v1 /= cd;
4360   v2 /= cd;
4361
4362   if (v2 < 0)
4363     {
4364       v2 = -v2;
4365       v1 = -v1;
4366     }
4367
4368   dist_v[x_1] = v2;
4369   dist_v[x_2] = -v1;
4370   save_dist_v (ddr, dist_v);
4371
4372   add_outer_distances (ddr, dist_v, x_1);
4373 }
4374
4375 /* Helper function for the case where DDR_A and DDR_B are the same
4376    access functions.  */
4377
4378 static void
4379 add_other_self_distances (struct data_dependence_relation *ddr)
4380 {
4381   lambda_vector dist_v;
4382   unsigned i;
4383   int index_carry = DDR_NB_LOOPS (ddr);
4384   subscript *sub;
4385
4386   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4387     {
4388       tree access_fun = SUB_ACCESS_FN (sub, 0);
4389
4390       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
4391         {
4392           if (!evolution_function_is_univariate_p (access_fun))
4393             {
4394               if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
4395                 {
4396                   DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
4397                   return;
4398                 }
4399
4400               access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
4401
4402               if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
4403                 add_multivariate_self_dist (ddr, access_fun);
4404               else
4405                 /* The evolution step is not constant: it varies in
4406                    the outer loop, so this cannot be represented by a
4407                    distance vector.  For example in pr34635.c the
4408                    evolution is {0, +, {0, +, 4}_1}_2.  */
4409                 DDR_AFFINE_P (ddr) = false;
4410
4411               return;
4412             }
4413
4414           index_carry = MIN (index_carry,
4415                              index_in_loop_nest (CHREC_VARIABLE (access_fun),
4416                                                  DDR_LOOP_NEST (ddr)));
4417         }
4418     }
4419
4420   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4421   add_outer_distances (ddr, dist_v, index_carry);
4422 }
4423
4424 static void
4425 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
4426 {
4427   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4428
4429   dist_v[DDR_INNER_LOOP (ddr)] = 1;
4430   save_dist_v (ddr, dist_v);
4431 }
4432
4433 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
4434    is the case for example when access functions are the same and
4435    equal to a constant, as in:
4436
4437    | loop_1
4438    |   A[3] = ...
4439    |   ... = A[3]
4440    | endloop_1
4441
4442    in which case the distance vectors are (0) and (1).  */
4443
4444 static void
4445 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
4446 {
4447   unsigned i, j;
4448
4449   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4450     {
4451       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
4452       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
4453       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
4454
4455       for (j = 0; j < ca->n; j++)
4456         if (affine_function_zero_p (ca->fns[j]))
4457           {
4458             insert_innermost_unit_dist_vector (ddr);
4459             return;
4460           }
4461
4462       for (j = 0; j < cb->n; j++)
4463         if (affine_function_zero_p (cb->fns[j]))
4464           {
4465             insert_innermost_unit_dist_vector (ddr);
4466             return;
4467           }
4468     }
4469 }
4470
4471 /* Return true when the DDR contains two data references that have the
4472    same access functions.  */
4473
4474 static inline bool
4475 same_access_functions (const struct data_dependence_relation *ddr)
4476 {
4477   unsigned i;
4478   subscript *sub;
4479
4480   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4481     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
4482                           SUB_ACCESS_FN (sub, 1)))
4483       return false;
4484
4485   return true;
4486 }
4487
4488 /* Compute the classic per loop distance vector.  DDR is the data
4489    dependence relation to build a vector from.  Return false when fail
4490    to represent the data dependence as a distance vector.  */
4491
4492 static bool
4493 build_classic_dist_vector (struct data_dependence_relation *ddr,
4494                            struct loop *loop_nest)
4495 {
4496   bool init_b = false;
4497   int index_carry = DDR_NB_LOOPS (ddr);
4498   lambda_vector dist_v;
4499
4500   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4501     return false;
4502
4503   if (same_access_functions (ddr))
4504     {
4505       /* Save the 0 vector.  */
4506       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4507       save_dist_v (ddr, dist_v);
4508
4509       if (constant_access_functions (ddr))
4510         add_distance_for_zero_overlaps (ddr);
4511
4512       if (DDR_NB_LOOPS (ddr) > 1)
4513         add_other_self_distances (ddr);
4514
4515       return true;
4516     }
4517
4518   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4519   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
4520     return false;
4521
4522   /* Save the distance vector if we initialized one.  */
4523   if (init_b)
4524     {
4525       /* Verify a basic constraint: classic distance vectors should
4526          always be lexicographically positive.
4527
4528          Data references are collected in the order of execution of
4529          the program, thus for the following loop
4530
4531          | for (i = 1; i < 100; i++)
4532          |   for (j = 1; j < 100; j++)
4533          |     {
4534          |       t = T[j+1][i-1];  // A
4535          |       T[j][i] = t + 2;  // B
4536          |     }
4537
4538          references are collected following the direction of the wind:
4539          A then B.  The data dependence tests are performed also
4540          following this order, such that we're looking at the distance
4541          separating the elements accessed by A from the elements later
4542          accessed by B.  But in this example, the distance returned by
4543          test_dep (A, B) is lexicographically negative (-1, 1), that
4544          means that the access A occurs later than B with respect to
4545          the outer loop, ie. we're actually looking upwind.  In this
4546          case we solve test_dep (B, A) looking downwind to the
4547          lexicographically positive solution, that returns the
4548          distance vector (1, -1).  */
4549       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
4550         {
4551           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4552           if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4553             return false;
4554           compute_subscript_distance (ddr);
4555           if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
4556                                             &index_carry))
4557             return false;
4558           save_dist_v (ddr, save_v);
4559           DDR_REVERSED_P (ddr) = true;
4560
4561           /* In this case there is a dependence forward for all the
4562              outer loops:
4563
4564              | for (k = 1; k < 100; k++)
4565              |  for (i = 1; i < 100; i++)
4566              |   for (j = 1; j < 100; j++)
4567              |     {
4568              |       t = T[j+1][i-1];  // A
4569              |       T[j][i] = t + 2;  // B
4570              |     }
4571
4572              the vectors are:
4573              (0,  1, -1)
4574              (1,  1, -1)
4575              (1, -1,  1)
4576           */
4577           if (DDR_NB_LOOPS (ddr) > 1)
4578             {
4579               add_outer_distances (ddr, save_v, index_carry);
4580               add_outer_distances (ddr, dist_v, index_carry);
4581             }
4582         }
4583       else
4584         {
4585           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4586           lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4587
4588           if (DDR_NB_LOOPS (ddr) > 1)
4589             {
4590               lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4591
4592               if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4593                 return false;
4594               compute_subscript_distance (ddr);
4595               if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
4596                                                 &index_carry))
4597                 return false;
4598
4599               save_dist_v (ddr, save_v);
4600               add_outer_distances (ddr, dist_v, index_carry);
4601               add_outer_distances (ddr, opposite_v, index_carry);
4602             }
4603           else
4604             save_dist_v (ddr, save_v);
4605         }
4606     }
4607   else
4608     {
4609       /* There is a distance of 1 on all the outer loops: Example:
4610          there is a dependence of distance 1 on loop_1 for the array A.
4611
4612          | loop_1
4613          |   A[5] = ...
4614          | endloop
4615       */
4616       add_outer_distances (ddr, dist_v,
4617                            lambda_vector_first_nz (dist_v,
4618                                                    DDR_NB_LOOPS (ddr), 0));
4619     }
4620
4621   if (dump_file && (dump_flags & TDF_DETAILS))
4622     {
4623       unsigned i;
4624
4625       fprintf (dump_file, "(build_classic_dist_vector\n");
4626       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4627         {
4628           fprintf (dump_file, "  dist_vector = (");
4629           print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
4630                                DDR_NB_LOOPS (ddr));
4631           fprintf (dump_file, "  )\n");
4632         }
4633       fprintf (dump_file, ")\n");
4634     }
4635
4636   return true;
4637 }
4638
4639 /* Return the direction for a given distance.
4640    FIXME: Computing dir this way is suboptimal, since dir can catch
4641    cases that dist is unable to represent.  */
4642
4643 static inline enum data_dependence_direction
4644 dir_from_dist (int dist)
4645 {
4646   if (dist > 0)
4647     return dir_positive;
4648   else if (dist < 0)
4649     return dir_negative;
4650   else
4651     return dir_equal;
4652 }
4653
4654 /* Compute the classic per loop direction vector.  DDR is the data
4655    dependence relation to build a vector from.  */
4656
4657 static void
4658 build_classic_dir_vector (struct data_dependence_relation *ddr)
4659 {
4660   unsigned i, j;
4661   lambda_vector dist_v;
4662
4663   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
4664     {
4665       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4666
4667       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4668         dir_v[j] = dir_from_dist (dist_v[j]);
4669
4670       save_dir_v (ddr, dir_v);
4671     }
4672 }
4673
4674 /* Helper function.  Returns true when there is a dependence between the
4675    data references.  A_INDEX is the index of the first reference (0 for
4676    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
4677
4678 static bool
4679 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
4680                                unsigned int a_index, unsigned int b_index,
4681                                struct loop *loop_nest)
4682 {
4683   unsigned int i;
4684   tree last_conflicts;
4685   struct subscript *subscript;
4686   tree res = NULL_TREE;
4687
4688   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
4689     {
4690       conflict_function *overlaps_a, *overlaps_b;
4691
4692       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
4693                                       SUB_ACCESS_FN (subscript, b_index),
4694                                       &overlaps_a, &overlaps_b,
4695                                       &last_conflicts, loop_nest);
4696
4697       if (SUB_CONFLICTS_IN_A (subscript))
4698         free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4699       if (SUB_CONFLICTS_IN_B (subscript))
4700         free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4701
4702       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
4703       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
4704       SUB_LAST_CONFLICT (subscript) = last_conflicts;
4705
4706       /* If there is any undetermined conflict function we have to
4707          give a conservative answer in case we cannot prove that
4708          no dependence exists when analyzing another subscript.  */
4709       if (CF_NOT_KNOWN_P (overlaps_a)
4710           || CF_NOT_KNOWN_P (overlaps_b))
4711         {
4712           res = chrec_dont_know;
4713           continue;
4714         }
4715
4716       /* When there is a subscript with no dependence we can stop.  */
4717       else if (CF_NO_DEPENDENCE_P (overlaps_a)
4718                || CF_NO_DEPENDENCE_P (overlaps_b))
4719         {
4720           res = chrec_known;
4721           break;
4722         }
4723     }
4724
4725   if (res == NULL_TREE)
4726     return true;
4727
4728   if (res == chrec_known)
4729     dependence_stats.num_dependence_independent++;
4730   else
4731     dependence_stats.num_dependence_undetermined++;
4732   finalize_ddr_dependent (ddr, res);
4733   return false;
4734 }
4735
4736 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
4737
4738 static void
4739 subscript_dependence_tester (struct data_dependence_relation *ddr,
4740                              struct loop *loop_nest)
4741 {
4742   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
4743     dependence_stats.num_dependence_dependent++;
4744
4745   compute_subscript_distance (ddr);
4746   if (build_classic_dist_vector (ddr, loop_nest))
4747     build_classic_dir_vector (ddr);
4748 }
4749
4750 /* Returns true when all the access functions of A are affine or
4751    constant with respect to LOOP_NEST.  */
4752
4753 static bool
4754 access_functions_are_affine_or_constant_p (const struct data_reference *a,
4755                                            const struct loop *loop_nest)
4756 {
4757   unsigned int i;
4758   vec<tree> fns = DR_ACCESS_FNS (a);
4759   tree t;
4760
4761   FOR_EACH_VEC_ELT (fns, i, t)
4762     if (!evolution_function_is_invariant_p (t, loop_nest->num)
4763         && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
4764       return false;
4765
4766   return true;
4767 }
4768
4769 /* This computes the affine dependence relation between A and B with
4770    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
4771    independence between two accesses, while CHREC_DONT_KNOW is used
4772    for representing the unknown relation.
4773
4774    Note that it is possible to stop the computation of the dependence
4775    relation the first time we detect a CHREC_KNOWN element for a given
4776    subscript.  */
4777
4778 void
4779 compute_affine_dependence (struct data_dependence_relation *ddr,
4780                            struct loop *loop_nest)
4781 {
4782   struct data_reference *dra = DDR_A (ddr);
4783   struct data_reference *drb = DDR_B (ddr);
4784
4785   if (dump_file && (dump_flags & TDF_DETAILS))
4786     {
4787       fprintf (dump_file, "(compute_affine_dependence\n");
4788       fprintf (dump_file, "  stmt_a: ");
4789       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4790       fprintf (dump_file, "  stmt_b: ");
4791       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4792     }
4793
4794   /* Analyze only when the dependence relation is not yet known.  */
4795   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4796     {
4797       dependence_stats.num_dependence_tests++;
4798
4799       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4800           && access_functions_are_affine_or_constant_p (drb, loop_nest))
4801         subscript_dependence_tester (ddr, loop_nest);
4802
4803       /* As a last case, if the dependence cannot be determined, or if
4804          the dependence is considered too difficult to determine, answer
4805          "don't know".  */
4806       else
4807         {
4808           dependence_stats.num_dependence_undetermined++;
4809
4810           if (dump_file && (dump_flags & TDF_DETAILS))
4811             {
4812               fprintf (dump_file, "Data ref a:\n");
4813               dump_data_reference (dump_file, dra);
4814               fprintf (dump_file, "Data ref b:\n");
4815               dump_data_reference (dump_file, drb);
4816               fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4817             }
4818           finalize_ddr_dependent (ddr, chrec_dont_know);
4819         }
4820     }
4821
4822   if (dump_file && (dump_flags & TDF_DETAILS))
4823     {
4824       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4825         fprintf (dump_file, ") -> no dependence\n");
4826       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4827         fprintf (dump_file, ") -> dependence analysis failed\n");
4828       else
4829         fprintf (dump_file, ")\n");
4830     }
4831 }
4832
4833 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4834    the data references in DATAREFS, in the LOOP_NEST.  When
4835    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4836    relations.  Return true when successful, i.e. data references number
4837    is small enough to be handled.  */
4838
4839 bool
4840 compute_all_dependences (vec<data_reference_p> datarefs,
4841                          vec<ddr_p> *dependence_relations,
4842                          vec<loop_p> loop_nest,
4843                          bool compute_self_and_rr)
4844 {
4845   struct data_dependence_relation *ddr;
4846   struct data_reference *a, *b;
4847   unsigned int i, j;
4848
4849   if ((int) datarefs.length ()
4850       > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4851     {
4852       struct data_dependence_relation *ddr;
4853
4854       /* Insert a single relation into dependence_relations:
4855          chrec_dont_know.  */
4856       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4857       dependence_relations->safe_push (ddr);
4858       return false;
4859     }
4860
4861   FOR_EACH_VEC_ELT (datarefs, i, a)
4862     for (j = i + 1; datarefs.iterate (j, &b); j++)
4863       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4864         {
4865           ddr = initialize_data_dependence_relation (a, b, loop_nest);
4866           dependence_relations->safe_push (ddr);
4867           if (loop_nest.exists ())
4868             compute_affine_dependence (ddr, loop_nest[0]);
4869         }
4870
4871   if (compute_self_and_rr)
4872     FOR_EACH_VEC_ELT (datarefs, i, a)
4873       {
4874         ddr = initialize_data_dependence_relation (a, a, loop_nest);
4875         dependence_relations->safe_push (ddr);
4876         if (loop_nest.exists ())
4877           compute_affine_dependence (ddr, loop_nest[0]);
4878       }
4879
4880   return true;
4881 }
4882
4883 /* Describes a location of a memory reference.  */
4884
4885 struct data_ref_loc
4886 {
4887   /* The memory reference.  */
4888   tree ref;
4889
4890   /* True if the memory reference is read.  */
4891   bool is_read;
4892
4893   /* True if the data reference is conditional within the containing
4894      statement, i.e. if it might not occur even when the statement
4895      is executed and runs to completion.  */
4896   bool is_conditional_in_stmt;
4897 };
4898
4899
4900 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
4901    true if STMT clobbers memory, false otherwise.  */
4902
4903 static bool
4904 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
4905 {
4906   bool clobbers_memory = false;
4907   data_ref_loc ref;
4908   tree op0, op1;
4909   enum gimple_code stmt_code = gimple_code (stmt);
4910
4911   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4912      As we cannot model data-references to not spelled out
4913      accesses give up if they may occur.  */
4914   if (stmt_code == GIMPLE_CALL
4915       && !(gimple_call_flags (stmt) & ECF_CONST))
4916     {
4917       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
4918       if (gimple_call_internal_p (stmt))
4919         switch (gimple_call_internal_fn (stmt))
4920           {
4921           case IFN_GOMP_SIMD_LANE:
4922             {
4923               struct loop *loop = gimple_bb (stmt)->loop_father;
4924               tree uid = gimple_call_arg (stmt, 0);
4925               gcc_assert (TREE_CODE (uid) == SSA_NAME);
4926               if (loop == NULL
4927                   || loop->simduid != SSA_NAME_VAR (uid))
4928                 clobbers_memory = true;
4929               break;
4930             }
4931           case IFN_MASK_LOAD:
4932           case IFN_MASK_STORE:
4933             break;
4934           default:
4935             clobbers_memory = true;
4936             break;
4937           }
4938       else
4939         clobbers_memory = true;
4940     }
4941   else if (stmt_code == GIMPLE_ASM
4942            && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
4943                || gimple_vuse (stmt)))
4944     clobbers_memory = true;
4945
4946   if (!gimple_vuse (stmt))
4947     return clobbers_memory;
4948
4949   if (stmt_code == GIMPLE_ASSIGN)
4950     {
4951       tree base;
4952       op0 = gimple_assign_lhs (stmt);
4953       op1 = gimple_assign_rhs1 (stmt);
4954
4955       if (DECL_P (op1)
4956           || (REFERENCE_CLASS_P (op1)
4957               && (base = get_base_address (op1))
4958               && TREE_CODE (base) != SSA_NAME
4959               && !is_gimple_min_invariant (base)))
4960         {
4961           ref.ref = op1;
4962           ref.is_read = true;
4963           ref.is_conditional_in_stmt = false;
4964           references->safe_push (ref);
4965         }
4966     }
4967   else if (stmt_code == GIMPLE_CALL)
4968     {
4969       unsigned i, n;
4970       tree ptr, type;
4971       unsigned int align;
4972
4973       ref.is_read = false;
4974       if (gimple_call_internal_p (stmt))
4975         switch (gimple_call_internal_fn (stmt))
4976           {
4977           case IFN_MASK_LOAD:
4978             if (gimple_call_lhs (stmt) == NULL_TREE)
4979               break;
4980             ref.is_read = true;
4981             /* FALLTHRU */
4982           case IFN_MASK_STORE:
4983             ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
4984             align = tree_to_shwi (gimple_call_arg (stmt, 1));
4985             if (ref.is_read)
4986               type = TREE_TYPE (gimple_call_lhs (stmt));
4987             else
4988               type = TREE_TYPE (gimple_call_arg (stmt, 3));
4989             if (TYPE_ALIGN (type) != align)
4990               type = build_aligned_type (type, align);
4991             ref.is_conditional_in_stmt = true;
4992             ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
4993                                    ptr);
4994             references->safe_push (ref);
4995             return false;
4996           default:
4997             break;
4998           }
4999
5000       op0 = gimple_call_lhs (stmt);
5001       n = gimple_call_num_args (stmt);
5002       for (i = 0; i < n; i++)
5003         {
5004           op1 = gimple_call_arg (stmt, i);
5005
5006           if (DECL_P (op1)
5007               || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5008             {
5009               ref.ref = op1;
5010               ref.is_read = true;
5011               ref.is_conditional_in_stmt = false;
5012               references->safe_push (ref);
5013             }
5014         }
5015     }
5016   else
5017     return clobbers_memory;
5018
5019   if (op0
5020       && (DECL_P (op0)
5021           || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5022     {
5023       ref.ref = op0;
5024       ref.is_read = false;
5025       ref.is_conditional_in_stmt = false;
5026       references->safe_push (ref);
5027     }
5028   return clobbers_memory;
5029 }
5030
5031
5032 /* Returns true if the loop-nest has any data reference.  */
5033
5034 bool
5035 loop_nest_has_data_refs (loop_p loop)
5036 {
5037   basic_block *bbs = get_loop_body (loop);
5038   auto_vec<data_ref_loc, 3> references;
5039
5040   for (unsigned i = 0; i < loop->num_nodes; i++)
5041     {
5042       basic_block bb = bbs[i];
5043       gimple_stmt_iterator bsi;
5044
5045       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5046         {
5047           gimple *stmt = gsi_stmt (bsi);
5048           get_references_in_stmt (stmt, &references);
5049           if (references.length ())
5050             {
5051               free (bbs);
5052               return true;
5053             }
5054         }
5055     }
5056   free (bbs);
5057   return false;
5058 }
5059
5060 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
5061    reference, returns false, otherwise returns true.  NEST is the outermost
5062    loop of the loop nest in which the references should be analyzed.  */
5063
5064 bool
5065 find_data_references_in_stmt (struct loop *nest, gimple *stmt,
5066                               vec<data_reference_p> *datarefs)
5067 {
5068   unsigned i;
5069   auto_vec<data_ref_loc, 2> references;
5070   data_ref_loc *ref;
5071   bool ret = true;
5072   data_reference_p dr;
5073
5074   if (get_references_in_stmt (stmt, &references))
5075     return false;
5076
5077   FOR_EACH_VEC_ELT (references, i, ref)
5078     {
5079       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5080                             loop_containing_stmt (stmt), ref->ref,
5081                             stmt, ref->is_read, ref->is_conditional_in_stmt);
5082       gcc_assert (dr != NULL);
5083       datarefs->safe_push (dr);
5084     }
5085
5086   return ret;
5087 }
5088
5089 /* Stores the data references in STMT to DATAREFS.  If there is an
5090    unanalyzable reference, returns false, otherwise returns true.
5091    NEST is the outermost loop of the loop nest in which the references
5092    should be instantiated, LOOP is the loop in which the references
5093    should be analyzed.  */
5094
5095 bool
5096 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5097                                        vec<data_reference_p> *datarefs)
5098 {
5099   unsigned i;
5100   auto_vec<data_ref_loc, 2> references;
5101   data_ref_loc *ref;
5102   bool ret = true;
5103   data_reference_p dr;
5104
5105   if (get_references_in_stmt (stmt, &references))
5106     return false;
5107
5108   FOR_EACH_VEC_ELT (references, i, ref)
5109     {
5110       dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
5111                             ref->is_conditional_in_stmt);
5112       gcc_assert (dr != NULL);
5113       datarefs->safe_push (dr);
5114     }
5115
5116   return ret;
5117 }
5118
5119 /* Search the data references in LOOP, and record the information into
5120    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5121    difficult case, returns NULL_TREE otherwise.  */
5122
5123 tree
5124 find_data_references_in_bb (struct loop *loop, basic_block bb,
5125                             vec<data_reference_p> *datarefs)
5126 {
5127   gimple_stmt_iterator bsi;
5128
5129   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5130     {
5131       gimple *stmt = gsi_stmt (bsi);
5132
5133       if (!find_data_references_in_stmt (loop, stmt, datarefs))
5134         {
5135           struct data_reference *res;
5136           res = XCNEW (struct data_reference);
5137           datarefs->safe_push (res);
5138
5139           return chrec_dont_know;
5140         }
5141     }
5142
5143   return NULL_TREE;
5144 }
5145
5146 /* Search the data references in LOOP, and record the information into
5147    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5148    difficult case, returns NULL_TREE otherwise.
5149
5150    TODO: This function should be made smarter so that it can handle address
5151    arithmetic as if they were array accesses, etc.  */
5152
5153 tree
5154 find_data_references_in_loop (struct loop *loop,
5155                               vec<data_reference_p> *datarefs)
5156 {
5157   basic_block bb, *bbs;
5158   unsigned int i;
5159
5160   bbs = get_loop_body_in_dom_order (loop);
5161
5162   for (i = 0; i < loop->num_nodes; i++)
5163     {
5164       bb = bbs[i];
5165
5166       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
5167         {
5168           free (bbs);
5169           return chrec_dont_know;
5170         }
5171     }
5172   free (bbs);
5173
5174   return NULL_TREE;
5175 }
5176
5177 /* Return the alignment in bytes that DRB is guaranteed to have at all
5178    times.  */
5179
5180 unsigned int
5181 dr_alignment (innermost_loop_behavior *drb)
5182 {
5183   /* Get the alignment of BASE_ADDRESS + INIT.  */
5184   unsigned int alignment = drb->base_alignment;
5185   unsigned int misalignment = (drb->base_misalignment
5186                                + TREE_INT_CST_LOW (drb->init));
5187   if (misalignment != 0)
5188     alignment = MIN (alignment, misalignment & -misalignment);
5189
5190   /* Cap it to the alignment of OFFSET.  */
5191   if (!integer_zerop (drb->offset))
5192     alignment = MIN (alignment, drb->offset_alignment);
5193
5194   /* Cap it to the alignment of STEP.  */
5195   if (!integer_zerop (drb->step))
5196     alignment = MIN (alignment, drb->step_alignment);
5197
5198   return alignment;
5199 }
5200
5201 /* Recursive helper function.  */
5202
5203 static bool
5204 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
5205 {
5206   /* Inner loops of the nest should not contain siblings.  Example:
5207      when there are two consecutive loops,
5208
5209      | loop_0
5210      |   loop_1
5211      |     A[{0, +, 1}_1]
5212      |   endloop_1
5213      |   loop_2
5214      |     A[{0, +, 1}_2]
5215      |   endloop_2
5216      | endloop_0
5217
5218      the dependence relation cannot be captured by the distance
5219      abstraction.  */
5220   if (loop->next)
5221     return false;
5222
5223   loop_nest->safe_push (loop);
5224   if (loop->inner)
5225     return find_loop_nest_1 (loop->inner, loop_nest);
5226   return true;
5227 }
5228
5229 /* Return false when the LOOP is not well nested.  Otherwise return
5230    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
5231    contain the loops from the outermost to the innermost, as they will
5232    appear in the classic distance vector.  */
5233
5234 bool
5235 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
5236 {
5237   loop_nest->safe_push (loop);
5238   if (loop->inner)
5239     return find_loop_nest_1 (loop->inner, loop_nest);
5240   return true;
5241 }
5242
5243 /* Returns true when the data dependences have been computed, false otherwise.
5244    Given a loop nest LOOP, the following vectors are returned:
5245    DATAREFS is initialized to all the array elements contained in this loop,
5246    DEPENDENCE_RELATIONS contains the relations between the data references.
5247    Compute read-read and self relations if
5248    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
5249
5250 bool
5251 compute_data_dependences_for_loop (struct loop *loop,
5252                                    bool compute_self_and_read_read_dependences,
5253                                    vec<loop_p> *loop_nest,
5254                                    vec<data_reference_p> *datarefs,
5255                                    vec<ddr_p> *dependence_relations)
5256 {
5257   bool res = true;
5258
5259   memset (&dependence_stats, 0, sizeof (dependence_stats));
5260
5261   /* If the loop nest is not well formed, or one of the data references
5262      is not computable, give up without spending time to compute other
5263      dependences.  */
5264   if (!loop
5265       || !find_loop_nest (loop, loop_nest)
5266       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
5267       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
5268                                    compute_self_and_read_read_dependences))
5269     res = false;
5270
5271   if (dump_file && (dump_flags & TDF_STATS))
5272     {
5273       fprintf (dump_file, "Dependence tester statistics:\n");
5274
5275       fprintf (dump_file, "Number of dependence tests: %d\n",
5276                dependence_stats.num_dependence_tests);
5277       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
5278                dependence_stats.num_dependence_dependent);
5279       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
5280                dependence_stats.num_dependence_independent);
5281       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
5282                dependence_stats.num_dependence_undetermined);
5283
5284       fprintf (dump_file, "Number of subscript tests: %d\n",
5285                dependence_stats.num_subscript_tests);
5286       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
5287                dependence_stats.num_subscript_undetermined);
5288       fprintf (dump_file, "Number of same subscript function: %d\n",
5289                dependence_stats.num_same_subscript_function);
5290
5291       fprintf (dump_file, "Number of ziv tests: %d\n",
5292                dependence_stats.num_ziv);
5293       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
5294                dependence_stats.num_ziv_dependent);
5295       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
5296                dependence_stats.num_ziv_independent);
5297       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
5298                dependence_stats.num_ziv_unimplemented);
5299
5300       fprintf (dump_file, "Number of siv tests: %d\n",
5301                dependence_stats.num_siv);
5302       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
5303                dependence_stats.num_siv_dependent);
5304       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
5305                dependence_stats.num_siv_independent);
5306       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
5307                dependence_stats.num_siv_unimplemented);
5308
5309       fprintf (dump_file, "Number of miv tests: %d\n",
5310                dependence_stats.num_miv);
5311       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
5312                dependence_stats.num_miv_dependent);
5313       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
5314                dependence_stats.num_miv_independent);
5315       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
5316                dependence_stats.num_miv_unimplemented);
5317     }
5318
5319   return res;
5320 }
5321
5322 /* Free the memory used by a data dependence relation DDR.  */
5323
5324 void
5325 free_dependence_relation (struct data_dependence_relation *ddr)
5326 {
5327   if (ddr == NULL)
5328     return;
5329
5330   if (DDR_SUBSCRIPTS (ddr).exists ())
5331     free_subscripts (DDR_SUBSCRIPTS (ddr));
5332   DDR_DIST_VECTS (ddr).release ();
5333   DDR_DIR_VECTS (ddr).release ();
5334
5335   free (ddr);
5336 }
5337
5338 /* Free the memory used by the data dependence relations from
5339    DEPENDENCE_RELATIONS.  */
5340
5341 void
5342 free_dependence_relations (vec<ddr_p> dependence_relations)
5343 {
5344   unsigned int i;
5345   struct data_dependence_relation *ddr;
5346
5347   FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5348     if (ddr)
5349       free_dependence_relation (ddr);
5350
5351   dependence_relations.release ();
5352 }
5353
5354 /* Free the memory used by the data references from DATAREFS.  */
5355
5356 void
5357 free_data_refs (vec<data_reference_p> datarefs)
5358 {
5359   unsigned int i;
5360   struct data_reference *dr;
5361
5362   FOR_EACH_VEC_ELT (datarefs, i, dr)
5363     free_data_ref (dr);
5364   datarefs.release ();
5365 }
5366
5367 /* Common routine implementing both dr_direction_indicator and
5368    dr_zero_step_indicator.  Return USEFUL_MIN if the indicator is known
5369    to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
5370    Return the step as the indicator otherwise.  */
5371
5372 static tree
5373 dr_step_indicator (struct data_reference *dr, int useful_min)
5374 {
5375   tree step = DR_STEP (dr);
5376   STRIP_NOPS (step);
5377   /* Look for cases where the step is scaled by a positive constant
5378      integer, which will often be the access size.  If the multiplication
5379      doesn't change the sign (due to overflow effects) then we can
5380      test the unscaled value instead.  */
5381   if (TREE_CODE (step) == MULT_EXPR
5382       && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
5383       && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
5384     {
5385       tree factor = TREE_OPERAND (step, 1);
5386       step = TREE_OPERAND (step, 0);
5387
5388       /* Strip widening and truncating conversions as well as nops.  */
5389       if (CONVERT_EXPR_P (step)
5390           && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
5391         step = TREE_OPERAND (step, 0);
5392       tree type = TREE_TYPE (step);
5393
5394       /* Get the range of step values that would not cause overflow.  */
5395       widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
5396                          / wi::to_widest (factor));
5397       widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
5398                          / wi::to_widest (factor));
5399
5400       /* Get the range of values that the unconverted step actually has.  */
5401       wide_int step_min, step_max;
5402       if (TREE_CODE (step) != SSA_NAME
5403           || get_range_info (step, &step_min, &step_max) != VR_RANGE)
5404         {
5405           step_min = wi::to_wide (TYPE_MIN_VALUE (type));
5406           step_max = wi::to_wide (TYPE_MAX_VALUE (type));
5407         }
5408
5409       /* Check whether the unconverted step has an acceptable range.  */
5410       signop sgn = TYPE_SIGN (type);
5411       if (wi::les_p (minv, widest_int::from (step_min, sgn))
5412           && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
5413         {
5414           if (wi::ge_p (step_min, useful_min, sgn))
5415             return ssize_int (useful_min);
5416           else if (wi::lt_p (step_max, 0, sgn))
5417             return ssize_int (-1);
5418           else
5419             return fold_convert (ssizetype, step);
5420         }
5421     }
5422   return DR_STEP (dr);
5423 }
5424
5425 /* Return a value that is negative iff DR has a negative step.  */
5426
5427 tree
5428 dr_direction_indicator (struct data_reference *dr)
5429 {
5430   return dr_step_indicator (dr, 0);
5431 }
5432
5433 /* Return a value that is zero iff DR has a zero step.  */
5434
5435 tree
5436 dr_zero_step_indicator (struct data_reference *dr)
5437 {
5438   return dr_step_indicator (dr, 1);
5439 }
5440
5441 /* Return true if DR is known to have a nonnegative (but possibly zero)
5442    step.  */
5443
5444 bool
5445 dr_known_forward_stride_p (struct data_reference *dr)
5446 {
5447   tree indicator = dr_direction_indicator (dr);
5448   tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
5449                                    fold_convert (ssizetype, indicator),
5450                                    ssize_int (0));
5451   return neg_step_val && integer_zerop (neg_step_val);
5452 }