gcc/tree-data-ref.c

   1 /* Data references and dependences detectors.
   2    Copyright (C) 2003-2018 Free Software Foundation, Inc.
   3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass walks a given loop structure searching for array
  22    references.  The information about the array accesses is recorded
  23    in DATA_REFERENCE structures.
  24
  25    The basic test for determining the dependences is:
  26    given two access functions chrec1 and chrec2 to a same array, and
  27    x and y two vectors from the iteration domain, the same element of
  28    the array is accessed twice at iterations x and y if and only if:
  29    |             chrec1 (x) == chrec2 (y).
  30
  31    The goals of this analysis are:
  32
  33    - to determine the independence: the relation between two
  34      independent accesses is qualified with the chrec_known (this
  35      information allows a loop parallelization),
  36
  37    - when two data references access the same data, to qualify the
  38      dependence relation with classic dependence representations:
  39
  40        - distance vectors
  41        - direction vectors
  42        - loop carried level dependence
  43        - polyhedron dependence
  44      or with the chains of recurrences based representation,
  45
  46    - to define a knowledge base for storing the data dependence
  47      information,
  48
  49    - to define an interface to access this data.
  50
  51
  52    Definitions:
  53
  54    - subscript: given two array accesses a subscript is the tuple
  55    composed of the access functions for a given dimension.  Example:
  56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
  57    (f1, g1), (f2, g2), (f3, g3).
  58
  59    - Diophantine equation: an equation whose coefficients and
  60    solutions are integer constants, for example the equation
  61    |   3*x + 2*y = 1
  62    has an integer solution x = 1 and y = -1.
  63
  64    References:
  65
  66    - "Advanced Compilation for High Performance Computing" by Randy
  67    Allen and Ken Kennedy.
  68    http://citeseer.ist.psu.edu/goff91practical.html
  69
  70    - "Loop Transformations for Restructuring Compilers - The Foundations"
  71    by Utpal Banerjee.
  72
  73
  74 */
  75
  76 #include "config.h"
  77 #include "system.h"
  78 #include "coretypes.h"
  79 #include "backend.h"
  80 #include "rtl.h"
  81 #include "tree.h"
  82 #include "gimple.h"
  83 #include "gimple-pretty-print.h"
  84 #include "alias.h"
  85 #include "fold-const.h"
  86 #include "expr.h"
  87 #include "gimple-iterator.h"
  88 #include "tree-ssa-loop-niter.h"
  89 #include "tree-ssa-loop.h"
  90 #include "tree-ssa.h"
  91 #include "cfgloop.h"
  92 #include "tree-data-ref.h"
  93 #include "tree-scalar-evolution.h"
  94 #include "dumpfile.h"
  95 #include "tree-affine.h"
  96 #include "params.h"
  97 #include "builtins.h"
  98 #include "stringpool.h"
  99 #include "tree-vrp.h"
 100 #include "tree-ssanames.h"
 101 #include "tree-eh.h"
 102
 103 static struct datadep_stats
 104 {
 105   int num_dependence_tests;
 106   int num_dependence_dependent;
 107   int num_dependence_independent;
 108   int num_dependence_undetermined;
 109
 110   int num_subscript_tests;
 111   int num_subscript_undetermined;
 112   int num_same_subscript_function;
 113
 114   int num_ziv;
 115   int num_ziv_independent;
 116   int num_ziv_dependent;
 117   int num_ziv_unimplemented;
 118
 119   int num_siv;
 120   int num_siv_independent;
 121   int num_siv_dependent;
 122   int num_siv_unimplemented;
 123
 124   int num_miv;
 125   int num_miv_independent;
 126   int num_miv_dependent;
 127   int num_miv_unimplemented;
 128 } dependence_stats;
 129
 130 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
 131                                            unsigned int, unsigned int,
 132                                            struct loop *);
 133 /* Returns true iff A divides B.  */
 134
 135 static inline bool
 136 tree_fold_divides_p (const_tree a, const_tree b)
 137 {
 138   gcc_assert (TREE_CODE (a) == INTEGER_CST);
 139   gcc_assert (TREE_CODE (b) == INTEGER_CST);
 140   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
 141 }
 142
 143 /* Returns true iff A divides B.  */
 144
 145 static inline bool
 146 int_divides_p (int a, int b)
 147 {
 148   return ((b % a) == 0);
 149 }
 150
 151 /* Return true if reference REF contains a union access.  */
 152
 153 static bool
 154 ref_contains_union_access_p (tree ref)
 155 {
 156   while (handled_component_p (ref))
 157     {
 158       ref = TREE_OPERAND (ref, 0);
 159       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
 160           || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
 161         return true;
 162     }
 163   return false;
 164 }
 165
 166 \f
 167
 168 /* Dump into FILE all the data references from DATAREFS.  */
 169
 170 static void
 171 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
 172 {
 173   unsigned int i;
 174   struct data_reference *dr;
 175
 176   FOR_EACH_VEC_ELT (datarefs, i, dr)
 177     dump_data_reference (file, dr);
 178 }
 179
 180 /* Unified dump into FILE all the data references from DATAREFS.  */
 181
 182 DEBUG_FUNCTION void
 183 debug (vec<data_reference_p> &ref)
 184 {
 185   dump_data_references (stderr, ref);
 186 }
 187
 188 DEBUG_FUNCTION void
 189 debug (vec<data_reference_p> *ptr)
 190 {
 191   if (ptr)
 192     debug (*ptr);
 193   else
 194     fprintf (stderr, "<nil>\n");
 195 }
 196
 197
 198 /* Dump into STDERR all the data references from DATAREFS.  */
 199
 200 DEBUG_FUNCTION void
 201 debug_data_references (vec<data_reference_p> datarefs)
 202 {
 203   dump_data_references (stderr, datarefs);
 204 }
 205
 206 /* Print to STDERR the data_reference DR.  */
 207
 208 DEBUG_FUNCTION void
 209 debug_data_reference (struct data_reference *dr)
 210 {
 211   dump_data_reference (stderr, dr);
 212 }
 213
 214 /* Dump function for a DATA_REFERENCE structure.  */
 215
 216 void
 217 dump_data_reference (FILE *outf,
 218                      struct data_reference *dr)
 219 {
 220   unsigned int i;
 221
 222   fprintf (outf, "#(Data Ref: \n");
 223   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
 224   fprintf (outf, "#  stmt: ");
 225   print_gimple_stmt (outf, DR_STMT (dr), 0);
 226   fprintf (outf, "#  ref: ");
 227   print_generic_stmt (outf, DR_REF (dr));
 228   fprintf (outf, "#  base_object: ");
 229   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
 230
 231   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
 232     {
 233       fprintf (outf, "#  Access function %d: ", i);
 234       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
 235     }
 236   fprintf (outf, "#)\n");
 237 }
 238
 239 /* Unified dump function for a DATA_REFERENCE structure.  */
 240
 241 DEBUG_FUNCTION void
 242 debug (data_reference &ref)
 243 {
 244   dump_data_reference (stderr, &ref);
 245 }
 246
 247 DEBUG_FUNCTION void
 248 debug (data_reference *ptr)
 249 {
 250   if (ptr)
 251     debug (*ptr);
 252   else
 253     fprintf (stderr, "<nil>\n");
 254 }
 255
 256
 257 /* Dumps the affine function described by FN to the file OUTF.  */
 258
 259 DEBUG_FUNCTION void
 260 dump_affine_function (FILE *outf, affine_fn fn)
 261 {
 262   unsigned i;
 263   tree coef;
 264
 265   print_generic_expr (outf, fn[0], TDF_SLIM);
 266   for (i = 1; fn.iterate (i, &coef); i++)
 267     {
 268       fprintf (outf, " + ");
 269       print_generic_expr (outf, coef, TDF_SLIM);
 270       fprintf (outf, " * x_%u", i);
 271     }
 272 }
 273
 274 /* Dumps the conflict function CF to the file OUTF.  */
 275
 276 DEBUG_FUNCTION void
 277 dump_conflict_function (FILE *outf, conflict_function *cf)
 278 {
 279   unsigned i;
 280
 281   if (cf->n == NO_DEPENDENCE)
 282     fprintf (outf, "no dependence");
 283   else if (cf->n == NOT_KNOWN)
 284     fprintf (outf, "not known");
 285   else
 286     {
 287       for (i = 0; i < cf->n; i++)
 288         {
 289           if (i != 0)
 290             fprintf (outf, " ");
 291           fprintf (outf, "[");
 292           dump_affine_function (outf, cf->fns[i]);
 293           fprintf (outf, "]");
 294         }
 295     }
 296 }
 297
 298 /* Dump function for a SUBSCRIPT structure.  */
 299
 300 DEBUG_FUNCTION void
 301 dump_subscript (FILE *outf, struct subscript *subscript)
 302 {
 303   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
 304
 305   fprintf (outf, "\n (subscript \n");
 306   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
 307   dump_conflict_function (outf, cf);
 308   if (CF_NONTRIVIAL_P (cf))
 309     {
 310       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 311       fprintf (outf, "\n  last_conflict: ");
 312       print_generic_expr (outf, last_iteration);
 313     }
 314
 315   cf = SUB_CONFLICTS_IN_B (subscript);
 316   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
 317   dump_conflict_function (outf, cf);
 318   if (CF_NONTRIVIAL_P (cf))
 319     {
 320       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 321       fprintf (outf, "\n  last_conflict: ");
 322       print_generic_expr (outf, last_iteration);
 323     }
 324
 325   fprintf (outf, "\n  (Subscript distance: ");
 326   print_generic_expr (outf, SUB_DISTANCE (subscript));
 327   fprintf (outf, " ))\n");
 328 }
 329
 330 /* Print the classic direction vector DIRV to OUTF.  */
 331
 332 DEBUG_FUNCTION void
 333 print_direction_vector (FILE *outf,
 334                         lambda_vector dirv,
 335                         int length)
 336 {
 337   int eq;
 338
 339   for (eq = 0; eq < length; eq++)
 340     {
 341       enum data_dependence_direction dir = ((enum data_dependence_direction)
 342                                             dirv[eq]);
 343
 344       switch (dir)
 345         {
 346         case dir_positive:
 347           fprintf (outf, "    +");
 348           break;
 349         case dir_negative:
 350           fprintf (outf, "    -");
 351           break;
 352         case dir_equal:
 353           fprintf (outf, "    =");
 354           break;
 355         case dir_positive_or_equal:
 356           fprintf (outf, "   +=");
 357           break;
 358         case dir_positive_or_negative:
 359           fprintf (outf, "   +-");
 360           break;
 361         case dir_negative_or_equal:
 362           fprintf (outf, "   -=");
 363           break;
 364         case dir_star:
 365           fprintf (outf, "    *");
 366           break;
 367         default:
 368           fprintf (outf, "indep");
 369           break;
 370         }
 371     }
 372   fprintf (outf, "\n");
 373 }
 374
 375 /* Print a vector of direction vectors.  */
 376
 377 DEBUG_FUNCTION void
 378 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
 379                    int length)
 380 {
 381   unsigned j;
 382   lambda_vector v;
 383
 384   FOR_EACH_VEC_ELT (dir_vects, j, v)
 385     print_direction_vector (outf, v, length);
 386 }
 387
 388 /* Print out a vector VEC of length N to OUTFILE.  */
 389
 390 DEBUG_FUNCTION void
 391 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
 392 {
 393   int i;
 394
 395   for (i = 0; i < n; i++)
 396     fprintf (outfile, "%3d ", vector[i]);
 397   fprintf (outfile, "\n");
 398 }
 399
 400 /* Print a vector of distance vectors.  */
 401
 402 DEBUG_FUNCTION void
 403 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
 404                     int length)
 405 {
 406   unsigned j;
 407   lambda_vector v;
 408
 409   FOR_EACH_VEC_ELT (dist_vects, j, v)
 410     print_lambda_vector (outf, v, length);
 411 }
 412
 413 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
 414
 415 DEBUG_FUNCTION void
 416 dump_data_dependence_relation (FILE *outf,
 417                                struct data_dependence_relation *ddr)
 418 {
 419   struct data_reference *dra, *drb;
 420
 421   fprintf (outf, "(Data Dep: \n");
 422
 423   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 424     {
 425       if (ddr)
 426         {
 427           dra = DDR_A (ddr);
 428           drb = DDR_B (ddr);
 429           if (dra)
 430             dump_data_reference (outf, dra);
 431           else
 432             fprintf (outf, "    (nil)\n");
 433           if (drb)
 434             dump_data_reference (outf, drb);
 435           else
 436             fprintf (outf, "    (nil)\n");
 437         }
 438       fprintf (outf, "    (don't know)\n)\n");
 439       return;
 440     }
 441
 442   dra = DDR_A (ddr);
 443   drb = DDR_B (ddr);
 444   dump_data_reference (outf, dra);
 445   dump_data_reference (outf, drb);
 446
 447   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 448     fprintf (outf, "    (no dependence)\n");
 449
 450   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
 451     {
 452       unsigned int i;
 453       struct loop *loopi;
 454
 455       subscript *sub;
 456       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
 457         {
 458           fprintf (outf, "  access_fn_A: ");
 459           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
 460           fprintf (outf, "  access_fn_B: ");
 461           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
 462           dump_subscript (outf, sub);
 463         }
 464
 465       fprintf (outf, "  inner loop index: %d\n", DDR_INNER_LOOP (ddr));
 466       fprintf (outf, "  loop nest: (");
 467       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
 468         fprintf (outf, "%d ", loopi->num);
 469       fprintf (outf, ")\n");
 470
 471       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
 472         {
 473           fprintf (outf, "  distance_vector: ");
 474           print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
 475                                DDR_NB_LOOPS (ddr));
 476         }
 477
 478       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
 479         {
 480           fprintf (outf, "  direction_vector: ");
 481           print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
 482                                   DDR_NB_LOOPS (ddr));
 483         }
 484     }
 485
 486   fprintf (outf, ")\n");
 487 }
 488
 489 /* Debug version.  */
 490
 491 DEBUG_FUNCTION void
 492 debug_data_dependence_relation (struct data_dependence_relation *ddr)
 493 {
 494   dump_data_dependence_relation (stderr, ddr);
 495 }
 496
 497 /* Dump into FILE all the dependence relations from DDRS.  */
 498
 499 DEBUG_FUNCTION void
 500 dump_data_dependence_relations (FILE *file,
 501                                 vec<ddr_p> ddrs)
 502 {
 503   unsigned int i;
 504   struct data_dependence_relation *ddr;
 505
 506   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 507     dump_data_dependence_relation (file, ddr);
 508 }
 509
 510 DEBUG_FUNCTION void
 511 debug (vec<ddr_p> &ref)
 512 {
 513   dump_data_dependence_relations (stderr, ref);
 514 }
 515
 516 DEBUG_FUNCTION void
 517 debug (vec<ddr_p> *ptr)
 518 {
 519   if (ptr)
 520     debug (*ptr);
 521   else
 522     fprintf (stderr, "<nil>\n");
 523 }
 524
 525
 526 /* Dump to STDERR all the dependence relations from DDRS.  */
 527
 528 DEBUG_FUNCTION void
 529 debug_data_dependence_relations (vec<ddr_p> ddrs)
 530 {
 531   dump_data_dependence_relations (stderr, ddrs);
 532 }
 533
 534 /* Dumps the distance and direction vectors in FILE.  DDRS contains
 535    the dependence relations, and VECT_SIZE is the size of the
 536    dependence vectors, or in other words the number of loops in the
 537    considered nest.  */
 538
 539 DEBUG_FUNCTION void
 540 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
 541 {
 542   unsigned int i, j;
 543   struct data_dependence_relation *ddr;
 544   lambda_vector v;
 545
 546   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 547     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
 548       {
 549         FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
 550           {
 551             fprintf (file, "DISTANCE_V (");
 552             print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
 553             fprintf (file, ")\n");
 554           }
 555
 556         FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
 557           {
 558             fprintf (file, "DIRECTION_V (");
 559             print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
 560             fprintf (file, ")\n");
 561           }
 562       }
 563
 564   fprintf (file, "\n\n");
 565 }
 566
 567 /* Dumps the data dependence relations DDRS in FILE.  */
 568
 569 DEBUG_FUNCTION void
 570 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
 571 {
 572   unsigned int i;
 573   struct data_dependence_relation *ddr;
 574
 575   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 576     dump_data_dependence_relation (file, ddr);
 577
 578   fprintf (file, "\n\n");
 579 }
 580
 581 DEBUG_FUNCTION void
 582 debug_ddrs (vec<ddr_p> ddrs)
 583 {
 584   dump_ddrs (stderr, ddrs);
 585 }
 586
 587 /* Helper function for split_constant_offset.  Expresses OP0 CODE OP1
 588    (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
 589    constant of type ssizetype, and returns true.  If we cannot do this
 590    with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
 591    is returned.  */
 592
 593 static bool
 594 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 595                          tree *var, tree *off)
 596 {
 597   tree var0, var1;
 598   tree off0, off1;
 599   enum tree_code ocode = code;
 600
 601   *var = NULL_TREE;
 602   *off = NULL_TREE;
 603
 604   switch (code)
 605     {
 606     case INTEGER_CST:
 607       *var = build_int_cst (type, 0);
 608       *off = fold_convert (ssizetype, op0);
 609       return true;
 610
 611     case POINTER_PLUS_EXPR:
 612       ocode = PLUS_EXPR;
 613       /* FALLTHROUGH */
 614     case PLUS_EXPR:
 615     case MINUS_EXPR:
 616       split_constant_offset (op0, &var0, &off0);
 617       split_constant_offset (op1, &var1, &off1);
 618       *var = fold_build2 (code, type, var0, var1);
 619       *off = size_binop (ocode, off0, off1);
 620       return true;
 621
 622     case MULT_EXPR:
 623       if (TREE_CODE (op1) != INTEGER_CST)
 624         return false;
 625
 626       split_constant_offset (op0, &var0, &off0);
 627       *var = fold_build2 (MULT_EXPR, type, var0, op1);
 628       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
 629       return true;
 630
 631     case ADDR_EXPR:
 632       {
 633         tree base, poffset;
 634         poly_int64 pbitsize, pbitpos, pbytepos;
 635         machine_mode pmode;
 636         int punsignedp, preversep, pvolatilep;
 637
 638         op0 = TREE_OPERAND (op0, 0);
 639         base
 640           = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
 641                                  &punsignedp, &preversep, &pvolatilep);
 642
 643         if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 644           return false;
 645         base = build_fold_addr_expr (base);
 646         off0 = ssize_int (pbytepos);
 647
 648         if (poffset)
 649           {
 650             split_constant_offset (poffset, &poffset, &off1);
 651             off0 = size_binop (PLUS_EXPR, off0, off1);
 652             if (POINTER_TYPE_P (TREE_TYPE (base)))
 653               base = fold_build_pointer_plus (base, poffset);
 654             else
 655               base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
 656                                   fold_convert (TREE_TYPE (base), poffset));
 657           }
 658
 659         var0 = fold_convert (type, base);
 660
 661         /* If variable length types are involved, punt, otherwise casts
 662            might be converted into ARRAY_REFs in gimplify_conversion.
 663            To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
 664            possibly no longer appears in current GIMPLE, might resurface.
 665            This perhaps could run
 666            if (CONVERT_EXPR_P (var0))
 667              {
 668                gimplify_conversion (&var0);
 669                // Attempt to fill in any within var0 found ARRAY_REF's
 670                // element size from corresponding op embedded ARRAY_REF,
 671                // if unsuccessful, just punt.
 672              }  */
 673         while (POINTER_TYPE_P (type))
 674           type = TREE_TYPE (type);
 675         if (int_size_in_bytes (type) < 0)
 676           return false;
 677
 678         *var = var0;
 679         *off = off0;
 680         return true;
 681       }
 682
 683     case SSA_NAME:
 684       {
 685         if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
 686           return false;
 687
 688         gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
 689         enum tree_code subcode;
 690
 691         if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
 692           return false;
 693
 694         var0 = gimple_assign_rhs1 (def_stmt);
 695         subcode = gimple_assign_rhs_code (def_stmt);
 696         var1 = gimple_assign_rhs2 (def_stmt);
 697
 698         return split_constant_offset_1 (type, var0, subcode, var1, var, off);
 699       }
 700     CASE_CONVERT:
 701       {
 702         /* We must not introduce undefined overflow, and we must not change the value.
 703            Hence we're okay if the inner type doesn't overflow to start with
 704            (pointer or signed), the outer type also is an integer or pointer
 705            and the outer precision is at least as large as the inner.  */
 706         tree itype = TREE_TYPE (op0);
 707         if ((POINTER_TYPE_P (itype)
 708              || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
 709             && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
 710             && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
 711           {
 712             split_constant_offset (op0, &var0, off);
 713             *var = fold_convert (type, var0);
 714             return true;
 715           }
 716         return false;
 717       }
 718
 719     default:
 720       return false;
 721     }
 722 }
 723
 724 /* Expresses EXP as VAR + OFF, where off is a constant.  The type of OFF
 725    will be ssizetype.  */
 726
 727 void
 728 split_constant_offset (tree exp, tree *var, tree *off)
 729 {
 730   tree type = TREE_TYPE (exp), op0, op1, e, o;
 731   enum tree_code code;
 732
 733   *var = exp;
 734   *off = ssize_int (0);
 735
 736   if (tree_is_chrec (exp)
 737       || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
 738     return;
 739
 740   code = TREE_CODE (exp);
 741   extract_ops_from_tree (exp, &code, &op0, &op1);
 742   if (split_constant_offset_1 (type, op0, code, op1, &e, &o))
 743     {
 744       *var = e;
 745       *off = o;
 746     }
 747 }
 748
 749 /* Returns the address ADDR of an object in a canonical shape (without nop
 750    casts, and with type of pointer to the object).  */
 751
 752 static tree
 753 canonicalize_base_object_address (tree addr)
 754 {
 755   tree orig = addr;
 756
 757   STRIP_NOPS (addr);
 758
 759   /* The base address may be obtained by casting from integer, in that case
 760      keep the cast.  */
 761   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
 762     return orig;
 763
 764   if (TREE_CODE (addr) != ADDR_EXPR)
 765     return addr;
 766
 767   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
 768 }
 769
 770 /* Analyze the behavior of memory reference REF.  There are two modes:
 771
 772    - BB analysis.  In this case we simply split the address into base,
 773      init and offset components, without reference to any containing loop.
 774      The resulting base and offset are general expressions and they can
 775      vary arbitrarily from one iteration of the containing loop to the next.
 776      The step is always zero.
 777
 778    - loop analysis.  In this case we analyze the reference both wrt LOOP
 779      and on the basis that the reference occurs (is "used") in LOOP;
 780      see the comment above analyze_scalar_evolution_in_loop for more
 781      information about this distinction.  The base, init, offset and
 782      step fields are all invariant in LOOP.
 783
 784    Perform BB analysis if LOOP is null, or if LOOP is the function's
 785    dummy outermost loop.  In other cases perform loop analysis.
 786
 787    Return true if the analysis succeeded and store the results in DRB if so.
 788    BB analysis can only fail for bitfield or reversed-storage accesses.  */
 789
 790 bool
 791 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
 792                       struct loop *loop)
 793 {
 794   poly_int64 pbitsize, pbitpos;
 795   tree base, poffset;
 796   machine_mode pmode;
 797   int punsignedp, preversep, pvolatilep;
 798   affine_iv base_iv, offset_iv;
 799   tree init, dinit, step;
 800   bool in_loop = (loop && loop->num);
 801
 802   if (dump_file && (dump_flags & TDF_DETAILS))
 803     fprintf (dump_file, "analyze_innermost: ");
 804
 805   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
 806                               &punsignedp, &preversep, &pvolatilep);
 807   gcc_assert (base != NULL_TREE);
 808
 809   poly_int64 pbytepos;
 810   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 811     {
 812       if (dump_file && (dump_flags & TDF_DETAILS))
 813         fprintf (dump_file, "failed: bit offset alignment.\n");
 814       return false;
 815     }
 816
 817   if (preversep)
 818     {
 819       if (dump_file && (dump_flags & TDF_DETAILS))
 820         fprintf (dump_file, "failed: reverse storage order.\n");
 821       return false;
 822     }
 823
 824   /* Calculate the alignment and misalignment for the inner reference.  */
 825   unsigned int HOST_WIDE_INT bit_base_misalignment;
 826   unsigned int bit_base_alignment;
 827   get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
 828
 829   /* There are no bitfield references remaining in BASE, so the values
 830      we got back must be whole bytes.  */
 831   gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
 832               && bit_base_misalignment % BITS_PER_UNIT == 0);
 833   unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
 834   poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
 835
 836   if (TREE_CODE (base) == MEM_REF)
 837     {
 838       if (!integer_zerop (TREE_OPERAND (base, 1)))
 839         {
 840           /* Subtract MOFF from the base and add it to POFFSET instead.
 841              Adjust the misalignment to reflect the amount we subtracted.  */
 842           poly_offset_int moff = mem_ref_offset (base);
 843           base_misalignment -= moff.force_shwi ();
 844           tree mofft = wide_int_to_tree (sizetype, moff);
 845           if (!poffset)
 846             poffset = mofft;
 847           else
 848             poffset = size_binop (PLUS_EXPR, poffset, mofft);
 849         }
 850       base = TREE_OPERAND (base, 0);
 851     }
 852   else
 853     base = build_fold_addr_expr (base);
 854
 855   if (in_loop)
 856     {
 857       if (!simple_iv (loop, loop, base, &base_iv, true))
 858         {
 859           if (dump_file && (dump_flags & TDF_DETAILS))
 860             fprintf (dump_file, "failed: evolution of base is not affine.\n");
 861           return false;
 862         }
 863     }
 864   else
 865     {
 866       base_iv.base = base;
 867       base_iv.step = ssize_int (0);
 868       base_iv.no_overflow = true;
 869     }
 870
 871   if (!poffset)
 872     {
 873       offset_iv.base = ssize_int (0);
 874       offset_iv.step = ssize_int (0);
 875     }
 876   else
 877     {
 878       if (!in_loop)
 879         {
 880           offset_iv.base = poffset;
 881           offset_iv.step = ssize_int (0);
 882         }
 883       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
 884         {
 885           if (dump_file && (dump_flags & TDF_DETAILS))
 886             fprintf (dump_file, "failed: evolution of offset is not affine.\n");
 887           return false;
 888         }
 889     }
 890
 891   init = ssize_int (pbytepos);
 892
 893   /* Subtract any constant component from the base and add it to INIT instead.
 894      Adjust the misalignment to reflect the amount we subtracted.  */
 895   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
 896   init = size_binop (PLUS_EXPR, init, dinit);
 897   base_misalignment -= TREE_INT_CST_LOW (dinit);
 898
 899   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
 900   init = size_binop (PLUS_EXPR, init, dinit);
 901
 902   step = size_binop (PLUS_EXPR,
 903                      fold_convert (ssizetype, base_iv.step),
 904                      fold_convert (ssizetype, offset_iv.step));
 905
 906   base = canonicalize_base_object_address (base_iv.base);
 907
 908   /* See if get_pointer_alignment can guarantee a higher alignment than
 909      the one we calculated above.  */
 910   unsigned int HOST_WIDE_INT alt_misalignment;
 911   unsigned int alt_alignment;
 912   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
 913
 914   /* As above, these values must be whole bytes.  */
 915   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
 916               && alt_misalignment % BITS_PER_UNIT == 0);
 917   alt_alignment /= BITS_PER_UNIT;
 918   alt_misalignment /= BITS_PER_UNIT;
 919
 920   if (base_alignment < alt_alignment)
 921     {
 922       base_alignment = alt_alignment;
 923       base_misalignment = alt_misalignment;
 924     }
 925
 926   drb->base_address = base;
 927   drb->offset = fold_convert (ssizetype, offset_iv.base);
 928   drb->init = init;
 929   drb->step = step;
 930   if (known_misalignment (base_misalignment, base_alignment,
 931                           &drb->base_misalignment))
 932     drb->base_alignment = base_alignment;
 933   else
 934     {
 935       drb->base_alignment = known_alignment (base_misalignment);
 936       drb->base_misalignment = 0;
 937     }
 938   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
 939   drb->step_alignment = highest_pow2_factor (step);
 940
 941   if (dump_file && (dump_flags & TDF_DETAILS))
 942     fprintf (dump_file, "success.\n");
 943
 944   return true;
 945 }
 946
 947 /* Return true if OP is a valid component reference for a DR access
 948    function.  This accepts a subset of what handled_component_p accepts.  */
 949
 950 static bool
 951 access_fn_component_p (tree op)
 952 {
 953   switch (TREE_CODE (op))
 954     {
 955     case REALPART_EXPR:
 956     case IMAGPART_EXPR:
 957     case ARRAY_REF:
 958       return true;
 959
 960     case COMPONENT_REF:
 961       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
 962
 963     default:
 964       return false;
 965     }
 966 }
 967
 968 /* Determines the base object and the list of indices of memory reference
 969    DR, analyzed in LOOP and instantiated before NEST.  */
 970
 971 static void
 972 dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
 973 {
 974   vec<tree> access_fns = vNULL;
 975   tree ref, op;
 976   tree base, off, access_fn;
 977
 978   /* If analyzing a basic-block there are no indices to analyze
 979      and thus no access functions.  */
 980   if (!nest)
 981     {
 982       DR_BASE_OBJECT (dr) = DR_REF (dr);
 983       DR_ACCESS_FNS (dr).create (0);
 984       return;
 985     }
 986
 987   ref = DR_REF (dr);
 988
 989   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
 990      into a two element array with a constant index.  The base is
 991      then just the immediate underlying object.  */
 992   if (TREE_CODE (ref) == REALPART_EXPR)
 993     {
 994       ref = TREE_OPERAND (ref, 0);
 995       access_fns.safe_push (integer_zero_node);
 996     }
 997   else if (TREE_CODE (ref) == IMAGPART_EXPR)
 998     {
 999       ref = TREE_OPERAND (ref, 0);
1000       access_fns.safe_push (integer_one_node);
1001     }
1002
1003   /* Analyze access functions of dimensions we know to be independent.
1004      The list of component references handled here should be kept in
1005      sync with access_fn_component_p.  */
1006   while (handled_component_p (ref))
1007     {
1008       if (TREE_CODE (ref) == ARRAY_REF)
1009         {
1010           op = TREE_OPERAND (ref, 1);
1011           access_fn = analyze_scalar_evolution (loop, op);
1012           access_fn = instantiate_scev (nest, loop, access_fn);
1013           access_fns.safe_push (access_fn);
1014         }
1015       else if (TREE_CODE (ref) == COMPONENT_REF
1016                && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1017         {
1018           /* For COMPONENT_REFs of records (but not unions!) use the
1019              FIELD_DECL offset as constant access function so we can
1020              disambiguate a[i].f1 and a[i].f2.  */
1021           tree off = component_ref_field_offset (ref);
1022           off = size_binop (PLUS_EXPR,
1023                             size_binop (MULT_EXPR,
1024                                         fold_convert (bitsizetype, off),
1025                                         bitsize_int (BITS_PER_UNIT)),
1026                             DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1027           access_fns.safe_push (off);
1028         }
1029       else
1030         /* If we have an unhandled component we could not translate
1031            to an access function stop analyzing.  We have determined
1032            our base object in this case.  */
1033         break;
1034
1035       ref = TREE_OPERAND (ref, 0);
1036     }
1037
1038   /* If the address operand of a MEM_REF base has an evolution in the
1039      analyzed nest, add it as an additional independent access-function.  */
1040   if (TREE_CODE (ref) == MEM_REF)
1041     {
1042       op = TREE_OPERAND (ref, 0);
1043       access_fn = analyze_scalar_evolution (loop, op);
1044       access_fn = instantiate_scev (nest, loop, access_fn);
1045       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1046         {
1047           tree orig_type;
1048           tree memoff = TREE_OPERAND (ref, 1);
1049           base = initial_condition (access_fn);
1050           orig_type = TREE_TYPE (base);
1051           STRIP_USELESS_TYPE_CONVERSION (base);
1052           split_constant_offset (base, &base, &off);
1053           STRIP_USELESS_TYPE_CONVERSION (base);
1054           /* Fold the MEM_REF offset into the evolutions initial
1055              value to make more bases comparable.  */
1056           if (!integer_zerop (memoff))
1057             {
1058               off = size_binop (PLUS_EXPR, off,
1059                                 fold_convert (ssizetype, memoff));
1060               memoff = build_int_cst (TREE_TYPE (memoff), 0);
1061             }
1062           /* Adjust the offset so it is a multiple of the access type
1063              size and thus we separate bases that can possibly be used
1064              to produce partial overlaps (which the access_fn machinery
1065              cannot handle).  */
1066           wide_int rem;
1067           if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1068               && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1069               && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1070             rem = wi::mod_trunc
1071               (wi::to_wide (off),
1072                wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1073                SIGNED);
1074           else
1075             /* If we can't compute the remainder simply force the initial
1076                condition to zero.  */
1077             rem = wi::to_wide (off);
1078           off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1079           memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1080           /* And finally replace the initial condition.  */
1081           access_fn = chrec_replace_initial_condition
1082               (access_fn, fold_convert (orig_type, off));
1083           /* ???  This is still not a suitable base object for
1084              dr_may_alias_p - the base object needs to be an
1085              access that covers the object as whole.  With
1086              an evolution in the pointer this cannot be
1087              guaranteed.
1088              As a band-aid, mark the access so we can special-case
1089              it in dr_may_alias_p.  */
1090           tree old = ref;
1091           ref = fold_build2_loc (EXPR_LOCATION (ref),
1092                                  MEM_REF, TREE_TYPE (ref),
1093                                  base, memoff);
1094           MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1095           MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1096           DR_UNCONSTRAINED_BASE (dr) = true;
1097           access_fns.safe_push (access_fn);
1098         }
1099     }
1100   else if (DECL_P (ref))
1101     {
1102       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1103       ref = build2 (MEM_REF, TREE_TYPE (ref),
1104                     build_fold_addr_expr (ref),
1105                     build_int_cst (reference_alias_ptr_type (ref), 0));
1106     }
1107
1108   DR_BASE_OBJECT (dr) = ref;
1109   DR_ACCESS_FNS (dr) = access_fns;
1110 }
1111
1112 /* Extracts the alias analysis information from the memory reference DR.  */
1113
1114 static void
1115 dr_analyze_alias (struct data_reference *dr)
1116 {
1117   tree ref = DR_REF (dr);
1118   tree base = get_base_address (ref), addr;
1119
1120   if (INDIRECT_REF_P (base)
1121       || TREE_CODE (base) == MEM_REF)
1122     {
1123       addr = TREE_OPERAND (base, 0);
1124       if (TREE_CODE (addr) == SSA_NAME)
1125         DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1126     }
1127 }
1128
1129 /* Frees data reference DR.  */
1130
1131 void
1132 free_data_ref (data_reference_p dr)
1133 {
1134   DR_ACCESS_FNS (dr).release ();
1135   free (dr);
1136 }
1137
1138 /* Analyze memory reference MEMREF, which is accessed in STMT.
1139    The reference is a read if IS_READ is true, otherwise it is a write.
1140    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1141    within STMT, i.e. that it might not occur even if STMT is executed
1142    and runs to completion.
1143
1144    Return the data_reference description of MEMREF.  NEST is the outermost
1145    loop in which the reference should be instantiated, LOOP is the loop
1146    in which the data reference should be analyzed.  */
1147
1148 struct data_reference *
1149 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1150                  bool is_read, bool is_conditional_in_stmt)
1151 {
1152   struct data_reference *dr;
1153
1154   if (dump_file && (dump_flags & TDF_DETAILS))
1155     {
1156       fprintf (dump_file, "Creating dr for ");
1157       print_generic_expr (dump_file, memref, TDF_SLIM);
1158       fprintf (dump_file, "\n");
1159     }
1160
1161   dr = XCNEW (struct data_reference);
1162   DR_STMT (dr) = stmt;
1163   DR_REF (dr) = memref;
1164   DR_IS_READ (dr) = is_read;
1165   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1166
1167   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1168                         nest != NULL ? loop : NULL);
1169   dr_analyze_indices (dr, nest, loop);
1170   dr_analyze_alias (dr);
1171
1172   if (dump_file && (dump_flags & TDF_DETAILS))
1173     {
1174       unsigned i;
1175       fprintf (dump_file, "\tbase_address: ");
1176       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1177       fprintf (dump_file, "\n\toffset from base address: ");
1178       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1179       fprintf (dump_file, "\n\tconstant offset from base address: ");
1180       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1181       fprintf (dump_file, "\n\tstep: ");
1182       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1183       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1184       fprintf (dump_file, "\n\tbase misalignment: %d",
1185                DR_BASE_MISALIGNMENT (dr));
1186       fprintf (dump_file, "\n\toffset alignment: %d",
1187                DR_OFFSET_ALIGNMENT (dr));
1188       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1189       fprintf (dump_file, "\n\tbase_object: ");
1190       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1191       fprintf (dump_file, "\n");
1192       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1193         {
1194           fprintf (dump_file, "\tAccess function %d: ", i);
1195           print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1196         }
1197     }
1198
1199   return dr;
1200 }
1201
1202 /*  A helper function computes order between two tree epxressions T1 and T2.
1203     This is used in comparator functions sorting objects based on the order
1204     of tree expressions.  The function returns -1, 0, or 1.  */
1205
1206 int
1207 data_ref_compare_tree (tree t1, tree t2)
1208 {
1209   int i, cmp;
1210   enum tree_code code;
1211   char tclass;
1212
1213   if (t1 == t2)
1214     return 0;
1215   if (t1 == NULL)
1216     return -1;
1217   if (t2 == NULL)
1218     return 1;
1219
1220   STRIP_USELESS_TYPE_CONVERSION (t1);
1221   STRIP_USELESS_TYPE_CONVERSION (t2);
1222   if (t1 == t2)
1223     return 0;
1224
1225   if (TREE_CODE (t1) != TREE_CODE (t2)
1226       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1227     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1228
1229   code = TREE_CODE (t1);
1230   switch (code)
1231     {
1232     case INTEGER_CST:
1233       return tree_int_cst_compare (t1, t2);
1234
1235     case STRING_CST:
1236       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1237         return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1238       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1239                      TREE_STRING_LENGTH (t1));
1240
1241     case SSA_NAME:
1242       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1243         return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1244       break;
1245
1246     default:
1247       if (POLY_INT_CST_P (t1))
1248         return compare_sizes_for_sort (wi::to_poly_widest (t1),
1249                                        wi::to_poly_widest (t2));
1250
1251       tclass = TREE_CODE_CLASS (code);
1252
1253       /* For decls, compare their UIDs.  */
1254       if (tclass == tcc_declaration)
1255         {
1256           if (DECL_UID (t1) != DECL_UID (t2))
1257             return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1258           break;
1259         }
1260       /* For expressions, compare their operands recursively.  */
1261       else if (IS_EXPR_CODE_CLASS (tclass))
1262         {
1263           for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1264             {
1265               cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1266                                            TREE_OPERAND (t2, i));
1267               if (cmp != 0)
1268                 return cmp;
1269             }
1270         }
1271       else
1272         gcc_unreachable ();
1273     }
1274
1275   return 0;
1276 }
1277
1278 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1279    check.  */
1280
1281 bool
1282 runtime_alias_check_p (ddr_p ddr, struct loop *loop, bool speed_p)
1283 {
1284   if (dump_enabled_p ())
1285     {
1286       dump_printf (MSG_NOTE, "consider run-time aliasing test between ");
1287       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
1288       dump_printf (MSG_NOTE,  " and ");
1289       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
1290       dump_printf (MSG_NOTE, "\n");
1291     }
1292
1293   if (!speed_p)
1294     {
1295       if (dump_enabled_p ())
1296         dump_printf (MSG_MISSED_OPTIMIZATION,
1297                      "runtime alias check not supported when optimizing "
1298                      "for size.\n");
1299       return false;
1300     }
1301
1302   /* FORNOW: We don't support versioning with outer-loop in either
1303      vectorization or loop distribution.  */
1304   if (loop != NULL && loop->inner != NULL)
1305     {
1306       if (dump_enabled_p ())
1307         dump_printf (MSG_MISSED_OPTIMIZATION,
1308                      "runtime alias check not supported for outer loop.\n");
1309       return false;
1310     }
1311
1312   return true;
1313 }
1314
1315 /* Operator == between two dr_with_seg_len objects.
1316
1317    This equality operator is used to make sure two data refs
1318    are the same one so that we will consider to combine the
1319    aliasing checks of those two pairs of data dependent data
1320    refs.  */
1321
1322 static bool
1323 operator == (const dr_with_seg_len& d1,
1324              const dr_with_seg_len& d2)
1325 {
1326   return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1327                            DR_BASE_ADDRESS (d2.dr), 0)
1328           && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1329           && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1330           && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1331           && known_eq (d1.access_size, d2.access_size)
1332           && d1.align == d2.align);
1333 }
1334
1335 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1336    so that we can combine aliasing checks in one scan.  */
1337
1338 static int
1339 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1340 {
1341   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1342   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1343   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1344   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1345
1346   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1347      if a and c have the same basic address snd step, and b and d have the same
1348      address and step.  Therefore, if any a&c or b&d don't have the same address
1349      and step, we don't care the order of those two pairs after sorting.  */
1350   int comp_res;
1351
1352   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1353                                          DR_BASE_ADDRESS (b1.dr))) != 0)
1354     return comp_res;
1355   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1356                                          DR_BASE_ADDRESS (b2.dr))) != 0)
1357     return comp_res;
1358   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1359                                          DR_STEP (b1.dr))) != 0)
1360     return comp_res;
1361   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1362                                          DR_STEP (b2.dr))) != 0)
1363     return comp_res;
1364   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1365                                          DR_OFFSET (b1.dr))) != 0)
1366     return comp_res;
1367   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1368                                          DR_INIT (b1.dr))) != 0)
1369     return comp_res;
1370   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1371                                          DR_OFFSET (b2.dr))) != 0)
1372     return comp_res;
1373   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1374                                          DR_INIT (b2.dr))) != 0)
1375     return comp_res;
1376
1377   return 0;
1378 }
1379
1380 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1381    FACTOR is number of iterations that each data reference is accessed.
1382
1383    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1384    we create an expression:
1385
1386    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1387    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1388
1389    for aliasing checks.  However, in some cases we can decrease the number
1390    of checks by combining two checks into one.  For example, suppose we have
1391    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1392    condition is satisfied:
1393
1394    load_ptr_0 < load_ptr_1  &&
1395    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1396
1397    (this condition means, in each iteration of vectorized loop, the accessed
1398    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1399    load_ptr_1.)
1400
1401    we then can use only the following expression to finish the alising checks
1402    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1403
1404    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1405    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1406
1407    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1408    basic address.  */
1409
1410 void
1411 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1412                                poly_uint64)
1413 {
1414   /* Sort the collected data ref pairs so that we can scan them once to
1415      combine all possible aliasing checks.  */
1416   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1417
1418   /* Scan the sorted dr pairs and check if we can combine alias checks
1419      of two neighboring dr pairs.  */
1420   for (size_t i = 1; i < alias_pairs->length (); ++i)
1421     {
1422       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1423       dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
1424                       *dr_b1 = &(*alias_pairs)[i-1].second,
1425                       *dr_a2 = &(*alias_pairs)[i].first,
1426                       *dr_b2 = &(*alias_pairs)[i].second;
1427
1428       /* Remove duplicate data ref pairs.  */
1429       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1430         {
1431           if (dump_enabled_p ())
1432             {
1433               dump_printf (MSG_NOTE, "found equal ranges ");
1434               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1435               dump_printf (MSG_NOTE,  ", ");
1436               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1437               dump_printf (MSG_NOTE,  " and ");
1438               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1439               dump_printf (MSG_NOTE,  ", ");
1440               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1441               dump_printf (MSG_NOTE, "\n");
1442             }
1443           alias_pairs->ordered_remove (i--);
1444           continue;
1445         }
1446
1447       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1448         {
1449           /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1450              and DR_A1 and DR_A2 are two consecutive memrefs.  */
1451           if (*dr_a1 == *dr_a2)
1452             {
1453               std::swap (dr_a1, dr_b1);
1454               std::swap (dr_a2, dr_b2);
1455             }
1456
1457           poly_int64 init_a1, init_a2;
1458           /* Only consider cases in which the distance between the initial
1459              DR_A1 and the initial DR_A2 is known at compile time.  */
1460           if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1461                                 DR_BASE_ADDRESS (dr_a2->dr), 0)
1462               || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1463                                    DR_OFFSET (dr_a2->dr), 0)
1464               || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1465               || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1466             continue;
1467
1468           /* Don't combine if we can't tell which one comes first.  */
1469           if (!ordered_p (init_a1, init_a2))
1470             continue;
1471
1472           /* Make sure dr_a1 starts left of dr_a2.  */
1473           if (maybe_gt (init_a1, init_a2))
1474             {
1475               std::swap (*dr_a1, *dr_a2);
1476               std::swap (init_a1, init_a2);
1477             }
1478
1479           /* Work out what the segment length would be if we did combine
1480              DR_A1 and DR_A2:
1481
1482              - If DR_A1 and DR_A2 have equal lengths, that length is
1483                also the combined length.
1484
1485              - If DR_A1 and DR_A2 both have negative "lengths", the combined
1486                length is the lower bound on those lengths.
1487
1488              - If DR_A1 and DR_A2 both have positive lengths, the combined
1489                length is the upper bound on those lengths.
1490
1491              Other cases are unlikely to give a useful combination.
1492
1493              The lengths both have sizetype, so the sign is taken from
1494              the step instead.  */
1495           if (!operand_equal_p (dr_a1->seg_len, dr_a2->seg_len, 0))
1496             {
1497               poly_uint64 seg_len_a1, seg_len_a2;
1498               if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1499                   || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1500                 continue;
1501
1502               tree indicator_a = dr_direction_indicator (dr_a1->dr);
1503               if (TREE_CODE (indicator_a) != INTEGER_CST)
1504                 continue;
1505
1506               tree indicator_b = dr_direction_indicator (dr_a2->dr);
1507               if (TREE_CODE (indicator_b) != INTEGER_CST)
1508                 continue;
1509
1510               int sign_a = tree_int_cst_sgn (indicator_a);
1511               int sign_b = tree_int_cst_sgn (indicator_b);
1512
1513               poly_uint64 new_seg_len;
1514               if (sign_a <= 0 && sign_b <= 0)
1515                 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1516               else if (sign_a >= 0 && sign_b >= 0)
1517                 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1518               else
1519                 continue;
1520
1521               dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1522                                               new_seg_len);
1523               dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1524             }
1525
1526           /* This is always positive due to the swap above.  */
1527           poly_uint64 diff = init_a2 - init_a1;
1528
1529           /* The new check will start at DR_A1.  Make sure that its access
1530              size encompasses the initial DR_A2.  */
1531           if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1532             {
1533               dr_a1->access_size = upper_bound (dr_a1->access_size,
1534                                                 diff + dr_a2->access_size);
1535               unsigned int new_align = known_alignment (dr_a1->access_size);
1536               dr_a1->align = MIN (dr_a1->align, new_align);
1537             }
1538           if (dump_enabled_p ())
1539             {
1540               dump_printf (MSG_NOTE, "merging ranges for ");
1541               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1542               dump_printf (MSG_NOTE,  ", ");
1543               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1544               dump_printf (MSG_NOTE,  " and ");
1545               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1546               dump_printf (MSG_NOTE,  ", ");
1547               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1548               dump_printf (MSG_NOTE, "\n");
1549             }
1550           alias_pairs->ordered_remove (i);
1551           i--;
1552         }
1553     }
1554 }
1555
1556 /* Given LOOP's two data references and segment lengths described by DR_A
1557    and DR_B, create expression checking if the two addresses ranges intersect
1558    with each other based on index of the two addresses.  This can only be
1559    done if DR_A and DR_B referring to the same (array) object and the index
1560    is the only difference.  For example:
1561
1562                        DR_A                           DR_B
1563       data-ref         arr[i]                         arr[j]
1564       base_object      arr                            arr
1565       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
1566
1567    The addresses and their index are like:
1568
1569         |<- ADDR_A    ->|          |<- ADDR_B    ->|
1570      ------------------------------------------------------->
1571         |   |   |   |   |          |   |   |   |   |
1572      ------------------------------------------------------->
1573         i_0 ...         i_0+4      j_0 ...         j_0+4
1574
1575    We can create expression based on index rather than address:
1576
1577      (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1578
1579    Note evolution step of index needs to be considered in comparison.  */
1580
1581 static bool
1582 create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
1583                                      const dr_with_seg_len& dr_a,
1584                                      const dr_with_seg_len& dr_b)
1585 {
1586   if (integer_zerop (DR_STEP (dr_a.dr))
1587       || integer_zerop (DR_STEP (dr_b.dr))
1588       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1589     return false;
1590
1591   poly_uint64 seg_len1, seg_len2;
1592   if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
1593       || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
1594     return false;
1595
1596   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1597     return false;
1598
1599   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1600     return false;
1601
1602   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1603     return false;
1604
1605   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1606
1607   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1608   unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
1609   if (neg_step)
1610     {
1611       abs_step = -abs_step;
1612       seg_len1 = -seg_len1;
1613       seg_len2 = -seg_len2;
1614     }
1615   else
1616     {
1617       /* Include the access size in the length, so that we only have one
1618          tree addition below.  */
1619       seg_len1 += dr_a.access_size;
1620       seg_len2 += dr_b.access_size;
1621     }
1622
1623   /* Infer the number of iterations with which the memory segment is accessed
1624      by DR.  In other words, alias is checked if memory segment accessed by
1625      DR_A in some iterations intersect with memory segment accessed by DR_B
1626      in the same amount iterations.
1627      Note segnment length is a linear function of number of iterations with
1628      DR_STEP as the coefficient.  */
1629   poly_uint64 niter_len1, niter_len2;
1630   if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
1631       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
1632     return false;
1633
1634   poly_uint64 niter_access1 = 0, niter_access2 = 0;
1635   if (neg_step)
1636     {
1637       /* Divide each access size by the byte step, rounding up.  */
1638       if (!can_div_trunc_p (dr_a.access_size - abs_step - 1,
1639                             abs_step, &niter_access1)
1640           || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
1641                                abs_step, &niter_access2))
1642         return false;
1643     }
1644
1645   unsigned int i;
1646   for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1647     {
1648       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1649       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1650       /* Two indices must be the same if they are not scev, or not scev wrto
1651          current loop being vecorized.  */
1652       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1653           || TREE_CODE (access2) != POLYNOMIAL_CHREC
1654           || CHREC_VARIABLE (access1) != (unsigned)loop->num
1655           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1656         {
1657           if (operand_equal_p (access1, access2, 0))
1658             continue;
1659
1660           return false;
1661         }
1662       /* The two indices must have the same step.  */
1663       if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1664         return false;
1665
1666       tree idx_step = CHREC_RIGHT (access1);
1667       /* Index must have const step, otherwise DR_STEP won't be constant.  */
1668       gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1669       /* Index must evaluate in the same direction as DR.  */
1670       gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1671
1672       tree min1 = CHREC_LEFT (access1);
1673       tree min2 = CHREC_LEFT (access2);
1674       if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1675         return false;
1676
1677       /* Ideally, alias can be checked against loop's control IV, but we
1678          need to prove linear mapping between control IV and reference
1679          index.  Although that should be true, we check against (array)
1680          index of data reference.  Like segment length, index length is
1681          linear function of the number of iterations with index_step as
1682          the coefficient, i.e, niter_len * idx_step.  */
1683       tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1684                                    build_int_cst (TREE_TYPE (min1),
1685                                                   niter_len1));
1686       tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1687                                    build_int_cst (TREE_TYPE (min2),
1688                                                   niter_len2));
1689       tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
1690       tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
1691       /* Adjust ranges for negative step.  */
1692       if (neg_step)
1693         {
1694           /* IDX_LEN1 and IDX_LEN2 are negative in this case.  */
1695           std::swap (min1, max1);
1696           std::swap (min2, max2);
1697
1698           /* As with the lengths just calculated, we've measured the access
1699              sizes in iterations, so multiply them by the index step.  */
1700           tree idx_access1
1701             = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1702                            build_int_cst (TREE_TYPE (min1), niter_access1));
1703           tree idx_access2
1704             = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1705                            build_int_cst (TREE_TYPE (min2), niter_access2));
1706
1707           /* MINUS_EXPR because the above values are negative.  */
1708           max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (max1), max1, idx_access1);
1709           max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (max2), max2, idx_access2);
1710         }
1711       tree part_cond_expr
1712         = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1713             fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
1714             fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
1715       if (*cond_expr)
1716         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1717                                   *cond_expr, part_cond_expr);
1718       else
1719         *cond_expr = part_cond_expr;
1720     }
1721   return true;
1722 }
1723
1724 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
1725    every address ADDR accessed by D:
1726
1727      *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
1728
1729    In this case, every element accessed by D is aligned to at least
1730    ALIGN bytes.
1731
1732    If ALIGN is zero then instead set *SEG_MAX_OUT so that:
1733
1734      *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT.  */
1735
1736 static void
1737 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
1738                      tree *seg_max_out, HOST_WIDE_INT align)
1739 {
1740   /* Each access has the following pattern:
1741
1742           <- |seg_len| ->
1743           <--- A: -ve step --->
1744           +-----+-------+-----+-------+-----+
1745           | n-1 | ,.... |  0  | ..... | n-1 |
1746           +-----+-------+-----+-------+-----+
1747                         <--- B: +ve step --->
1748                         <- |seg_len| ->
1749                         |
1750                    base address
1751
1752      where "n" is the number of scalar iterations covered by the segment.
1753      (This should be VF for a particular pair if we know that both steps
1754      are the same, otherwise it will be the full number of scalar loop
1755      iterations.)
1756
1757      A is the range of bytes accessed when the step is negative,
1758      B is the range when the step is positive.
1759
1760      If the access size is "access_size" bytes, the lowest addressed byte is:
1761
1762          base + (step < 0 ? seg_len : 0)   [LB]
1763
1764      and the highest addressed byte is always below:
1765
1766          base + (step < 0 ? 0 : seg_len) + access_size   [UB]
1767
1768      Thus:
1769
1770          LB <= ADDR < UB
1771
1772      If ALIGN is nonzero, all three values are aligned to at least ALIGN
1773      bytes, so:
1774
1775          LB <= ADDR <= UB - ALIGN
1776
1777      where "- ALIGN" folds naturally with the "+ access_size" and often
1778      cancels it out.
1779
1780      We don't try to simplify LB and UB beyond this (e.g. by using
1781      MIN and MAX based on whether seg_len rather than the stride is
1782      negative) because it is possible for the absolute size of the
1783      segment to overflow the range of a ssize_t.
1784
1785      Keeping the pointer_plus outside of the cond_expr should allow
1786      the cond_exprs to be shared with other alias checks.  */
1787   tree indicator = dr_direction_indicator (d.dr);
1788   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
1789                                fold_convert (ssizetype, indicator),
1790                                ssize_int (0));
1791   tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
1792                                             DR_OFFSET (d.dr));
1793   addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
1794   tree seg_len
1795     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
1796
1797   tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1798                                 seg_len, size_zero_node);
1799   tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1800                                 size_zero_node, seg_len);
1801   max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
1802                            size_int (d.access_size - align));
1803
1804   *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
1805   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
1806 }
1807
1808 /* Given two data references and segment lengths described by DR_A and DR_B,
1809    create expression checking if the two addresses ranges intersect with
1810    each other:
1811
1812      ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
1813      || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0))  */
1814
1815 static void
1816 create_intersect_range_checks (struct loop *loop, tree *cond_expr,
1817                                const dr_with_seg_len& dr_a,
1818                                const dr_with_seg_len& dr_b)
1819 {
1820   *cond_expr = NULL_TREE;
1821   if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
1822     return;
1823
1824   unsigned HOST_WIDE_INT min_align;
1825   tree_code cmp_code;
1826   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
1827       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
1828     {
1829       /* In this case adding access_size to seg_len is likely to give
1830          a simple X * step, where X is either the number of scalar
1831          iterations or the vectorization factor.  We're better off
1832          keeping that, rather than subtracting an alignment from it.
1833
1834          In this case the maximum values are exclusive and so there is
1835          no alias if the maximum of one segment equals the minimum
1836          of another.  */
1837       min_align = 0;
1838       cmp_code = LE_EXPR;
1839     }
1840   else
1841     {
1842       /* Calculate the minimum alignment shared by all four pointers,
1843          then arrange for this alignment to be subtracted from the
1844          exclusive maximum values to get inclusive maximum values.
1845          This "- min_align" is cumulative with a "+ access_size"
1846          in the calculation of the maximum values.  In the best
1847          (and common) case, the two cancel each other out, leaving
1848          us with an inclusive bound based only on seg_len.  In the
1849          worst case we're simply adding a smaller number than before.
1850
1851          Because the maximum values are inclusive, there is an alias
1852          if the maximum value of one segment is equal to the minimum
1853          value of the other.  */
1854       min_align = MIN (dr_a.align, dr_b.align);
1855       cmp_code = LT_EXPR;
1856     }
1857
1858   tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
1859   get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
1860   get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
1861
1862   *cond_expr
1863     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1864         fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
1865         fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
1866 }
1867
1868 /* Create a conditional expression that represents the run-time checks for
1869    overlapping of address ranges represented by a list of data references
1870    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
1871    COND_EXPR is the conditional expression to be used in the if statement
1872    that controls which version of the loop gets executed at runtime.  */
1873
1874 void
1875 create_runtime_alias_checks (struct loop *loop,
1876                              vec<dr_with_seg_len_pair_t> *alias_pairs,
1877                              tree * cond_expr)
1878 {
1879   tree part_cond_expr;
1880
1881   for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
1882     {
1883       const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
1884       const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
1885
1886       if (dump_enabled_p ())
1887         {
1888           dump_printf (MSG_NOTE, "create runtime check for data references ");
1889           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
1890           dump_printf (MSG_NOTE, " and ");
1891           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
1892           dump_printf (MSG_NOTE, "\n");
1893         }
1894
1895       /* Create condition expression for each pair data references.  */
1896       create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
1897       if (*cond_expr)
1898         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1899                                   *cond_expr, part_cond_expr);
1900       else
1901         *cond_expr = part_cond_expr;
1902     }
1903 }
1904
1905 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1906    expressions.  */
1907 static bool
1908 dr_equal_offsets_p1 (tree offset1, tree offset2)
1909 {
1910   bool res;
1911
1912   STRIP_NOPS (offset1);
1913   STRIP_NOPS (offset2);
1914
1915   if (offset1 == offset2)
1916     return true;
1917
1918   if (TREE_CODE (offset1) != TREE_CODE (offset2)
1919       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1920     return false;
1921
1922   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1923                              TREE_OPERAND (offset2, 0));
1924
1925   if (!res || !BINARY_CLASS_P (offset1))
1926     return res;
1927
1928   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1929                              TREE_OPERAND (offset2, 1));
1930
1931   return res;
1932 }
1933
1934 /* Check if DRA and DRB have equal offsets.  */
1935 bool
1936 dr_equal_offsets_p (struct data_reference *dra,
1937                     struct data_reference *drb)
1938 {
1939   tree offset1, offset2;
1940
1941   offset1 = DR_OFFSET (dra);
1942   offset2 = DR_OFFSET (drb);
1943
1944   return dr_equal_offsets_p1 (offset1, offset2);
1945 }
1946
1947 /* Returns true if FNA == FNB.  */
1948
1949 static bool
1950 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1951 {
1952   unsigned i, n = fna.length ();
1953
1954   if (n != fnb.length ())
1955     return false;
1956
1957   for (i = 0; i < n; i++)
1958     if (!operand_equal_p (fna[i], fnb[i], 0))
1959       return false;
1960
1961   return true;
1962 }
1963
1964 /* If all the functions in CF are the same, returns one of them,
1965    otherwise returns NULL.  */
1966
1967 static affine_fn
1968 common_affine_function (conflict_function *cf)
1969 {
1970   unsigned i;
1971   affine_fn comm;
1972
1973   if (!CF_NONTRIVIAL_P (cf))
1974     return affine_fn ();
1975
1976   comm = cf->fns[0];
1977
1978   for (i = 1; i < cf->n; i++)
1979     if (!affine_function_equal_p (comm, cf->fns[i]))
1980       return affine_fn ();
1981
1982   return comm;
1983 }
1984
1985 /* Returns the base of the affine function FN.  */
1986
1987 static tree
1988 affine_function_base (affine_fn fn)
1989 {
1990   return fn[0];
1991 }
1992
1993 /* Returns true if FN is a constant.  */
1994
1995 static bool
1996 affine_function_constant_p (affine_fn fn)
1997 {
1998   unsigned i;
1999   tree coef;
2000
2001   for (i = 1; fn.iterate (i, &coef); i++)
2002     if (!integer_zerop (coef))
2003       return false;
2004
2005   return true;
2006 }
2007
2008 /* Returns true if FN is the zero constant function.  */
2009
2010 static bool
2011 affine_function_zero_p (affine_fn fn)
2012 {
2013   return (integer_zerop (affine_function_base (fn))
2014           && affine_function_constant_p (fn));
2015 }
2016
2017 /* Returns a signed integer type with the largest precision from TA
2018    and TB.  */
2019
2020 static tree
2021 signed_type_for_types (tree ta, tree tb)
2022 {
2023   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2024     return signed_type_for (ta);
2025   else
2026     return signed_type_for (tb);
2027 }
2028
2029 /* Applies operation OP on affine functions FNA and FNB, and returns the
2030    result.  */
2031
2032 static affine_fn
2033 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2034 {
2035   unsigned i, n, m;
2036   affine_fn ret;
2037   tree coef;
2038
2039   if (fnb.length () > fna.length ())
2040     {
2041       n = fna.length ();
2042       m = fnb.length ();
2043     }
2044   else
2045     {
2046       n = fnb.length ();
2047       m = fna.length ();
2048     }
2049
2050   ret.create (m);
2051   for (i = 0; i < n; i++)
2052     {
2053       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2054                                          TREE_TYPE (fnb[i]));
2055       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2056     }
2057
2058   for (; fna.iterate (i, &coef); i++)
2059     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2060                                  coef, integer_zero_node));
2061   for (; fnb.iterate (i, &coef); i++)
2062     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2063                                  integer_zero_node, coef));
2064
2065   return ret;
2066 }
2067
2068 /* Returns the sum of affine functions FNA and FNB.  */
2069
2070 static affine_fn
2071 affine_fn_plus (affine_fn fna, affine_fn fnb)
2072 {
2073   return affine_fn_op (PLUS_EXPR, fna, fnb);
2074 }
2075
2076 /* Returns the difference of affine functions FNA and FNB.  */
2077
2078 static affine_fn
2079 affine_fn_minus (affine_fn fna, affine_fn fnb)
2080 {
2081   return affine_fn_op (MINUS_EXPR, fna, fnb);
2082 }
2083
2084 /* Frees affine function FN.  */
2085
2086 static void
2087 affine_fn_free (affine_fn fn)
2088 {
2089   fn.release ();
2090 }
2091
2092 /* Determine for each subscript in the data dependence relation DDR
2093    the distance.  */
2094
2095 static void
2096 compute_subscript_distance (struct data_dependence_relation *ddr)
2097 {
2098   conflict_function *cf_a, *cf_b;
2099   affine_fn fn_a, fn_b, diff;
2100
2101   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2102     {
2103       unsigned int i;
2104
2105       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2106         {
2107           struct subscript *subscript;
2108
2109           subscript = DDR_SUBSCRIPT (ddr, i);
2110           cf_a = SUB_CONFLICTS_IN_A (subscript);
2111           cf_b = SUB_CONFLICTS_IN_B (subscript);
2112
2113           fn_a = common_affine_function (cf_a);
2114           fn_b = common_affine_function (cf_b);
2115           if (!fn_a.exists () || !fn_b.exists ())
2116             {
2117               SUB_DISTANCE (subscript) = chrec_dont_know;
2118               return;
2119             }
2120           diff = affine_fn_minus (fn_a, fn_b);
2121
2122           if (affine_function_constant_p (diff))
2123             SUB_DISTANCE (subscript) = affine_function_base (diff);
2124           else
2125             SUB_DISTANCE (subscript) = chrec_dont_know;
2126
2127           affine_fn_free (diff);
2128         }
2129     }
2130 }
2131
2132 /* Returns the conflict function for "unknown".  */
2133
2134 static conflict_function *
2135 conflict_fn_not_known (void)
2136 {
2137   conflict_function *fn = XCNEW (conflict_function);
2138   fn->n = NOT_KNOWN;
2139
2140   return fn;
2141 }
2142
2143 /* Returns the conflict function for "independent".  */
2144
2145 static conflict_function *
2146 conflict_fn_no_dependence (void)
2147 {
2148   conflict_function *fn = XCNEW (conflict_function);
2149   fn->n = NO_DEPENDENCE;
2150
2151   return fn;
2152 }
2153
2154 /* Returns true if the address of OBJ is invariant in LOOP.  */
2155
2156 static bool
2157 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
2158 {
2159   while (handled_component_p (obj))
2160     {
2161       if (TREE_CODE (obj) == ARRAY_REF)
2162         {
2163           /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
2164              need to check the stride and the lower bound of the reference.  */
2165           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2166                                                       loop->num)
2167               || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
2168                                                          loop->num))
2169             return false;
2170         }
2171       else if (TREE_CODE (obj) == COMPONENT_REF)
2172         {
2173           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2174                                                       loop->num))
2175             return false;
2176         }
2177       obj = TREE_OPERAND (obj, 0);
2178     }
2179
2180   if (!INDIRECT_REF_P (obj)
2181       && TREE_CODE (obj) != MEM_REF)
2182     return true;
2183
2184   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2185                                                   loop->num);
2186 }
2187
2188 /* Returns false if we can prove that data references A and B do not alias,
2189    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2190    considered.  */
2191
2192 bool
2193 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2194                 bool loop_nest)
2195 {
2196   tree addr_a = DR_BASE_OBJECT (a);
2197   tree addr_b = DR_BASE_OBJECT (b);
2198
2199   /* If we are not processing a loop nest but scalar code we
2200      do not need to care about possible cross-iteration dependences
2201      and thus can process the full original reference.  Do so,
2202      similar to how loop invariant motion applies extra offset-based
2203      disambiguation.  */
2204   if (!loop_nest)
2205     {
2206       aff_tree off1, off2;
2207       poly_widest_int size1, size2;
2208       get_inner_reference_aff (DR_REF (a), &off1, &size1);
2209       get_inner_reference_aff (DR_REF (b), &off2, &size2);
2210       aff_combination_scale (&off1, -1);
2211       aff_combination_add (&off2, &off1);
2212       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2213         return false;
2214     }
2215
2216   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2217       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2218       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2219       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2220     return false;
2221
2222   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2223      do not know the size of the base-object.  So we cannot do any
2224      offset/overlap based analysis but have to rely on points-to
2225      information only.  */
2226   if (TREE_CODE (addr_a) == MEM_REF
2227       && (DR_UNCONSTRAINED_BASE (a)
2228           || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2229     {
2230       /* For true dependences we can apply TBAA.  */
2231       if (flag_strict_aliasing
2232           && DR_IS_WRITE (a) && DR_IS_READ (b)
2233           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2234                                      get_alias_set (DR_REF (b))))
2235         return false;
2236       if (TREE_CODE (addr_b) == MEM_REF)
2237         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2238                                        TREE_OPERAND (addr_b, 0));
2239       else
2240         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2241                                        build_fold_addr_expr (addr_b));
2242     }
2243   else if (TREE_CODE (addr_b) == MEM_REF
2244            && (DR_UNCONSTRAINED_BASE (b)
2245                || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
2246     {
2247       /* For true dependences we can apply TBAA.  */
2248       if (flag_strict_aliasing
2249           && DR_IS_WRITE (a) && DR_IS_READ (b)
2250           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2251                                      get_alias_set (DR_REF (b))))
2252         return false;
2253       if (TREE_CODE (addr_a) == MEM_REF)
2254         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2255                                        TREE_OPERAND (addr_b, 0));
2256       else
2257         return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
2258                                        TREE_OPERAND (addr_b, 0));
2259     }
2260
2261   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2262      that is being subsetted in the loop nest.  */
2263   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2264     return refs_output_dependent_p (addr_a, addr_b);
2265   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2266     return refs_anti_dependent_p (addr_a, addr_b);
2267   return refs_may_alias_p (addr_a, addr_b);
2268 }
2269
2270 /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
2271    if it is meaningful to compare their associated access functions
2272    when checking for dependencies.  */
2273
2274 static bool
2275 access_fn_components_comparable_p (tree ref_a, tree ref_b)
2276 {
2277   /* Allow pairs of component refs from the following sets:
2278
2279        { REALPART_EXPR, IMAGPART_EXPR }
2280        { COMPONENT_REF }
2281        { ARRAY_REF }.  */
2282   tree_code code_a = TREE_CODE (ref_a);
2283   tree_code code_b = TREE_CODE (ref_b);
2284   if (code_a == IMAGPART_EXPR)
2285     code_a = REALPART_EXPR;
2286   if (code_b == IMAGPART_EXPR)
2287     code_b = REALPART_EXPR;
2288   if (code_a != code_b)
2289     return false;
2290
2291   if (TREE_CODE (ref_a) == COMPONENT_REF)
2292     /* ??? We cannot simply use the type of operand #0 of the refs here as
2293        the Fortran compiler smuggles type punning into COMPONENT_REFs.
2294        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
2295     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
2296             == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
2297
2298   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
2299                              TREE_TYPE (TREE_OPERAND (ref_b, 0)));
2300 }
2301
2302 /* Initialize a data dependence relation between data accesses A and
2303    B.  NB_LOOPS is the number of loops surrounding the references: the
2304    size of the classic distance/direction vectors.  */
2305
2306 struct data_dependence_relation *
2307 initialize_data_dependence_relation (struct data_reference *a,
2308                                      struct data_reference *b,
2309                                      vec<loop_p> loop_nest)
2310 {
2311   struct data_dependence_relation *res;
2312   unsigned int i;
2313
2314   res = XCNEW (struct data_dependence_relation);
2315   DDR_A (res) = a;
2316   DDR_B (res) = b;
2317   DDR_LOOP_NEST (res).create (0);
2318   DDR_SUBSCRIPTS (res).create (0);
2319   DDR_DIR_VECTS (res).create (0);
2320   DDR_DIST_VECTS (res).create (0);
2321
2322   if (a == NULL || b == NULL)
2323     {
2324       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2325       return res;
2326     }
2327
2328   /* If the data references do not alias, then they are independent.  */
2329   if (!dr_may_alias_p (a, b, loop_nest.exists ()))
2330     {
2331       DDR_ARE_DEPENDENT (res) = chrec_known;
2332       return res;
2333     }
2334
2335   unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
2336   unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
2337   if (num_dimensions_a == 0 || num_dimensions_b == 0)
2338     {
2339       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2340       return res;
2341     }
2342
2343   /* For unconstrained bases, the root (highest-indexed) subscript
2344      describes a variation in the base of the original DR_REF rather
2345      than a component access.  We have no type that accurately describes
2346      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
2347      applying this subscript) so limit the search to the last real
2348      component access.
2349
2350      E.g. for:
2351
2352         void
2353         f (int a[][8], int b[][8])
2354         {
2355           for (int i = 0; i < 8; ++i)
2356             a[i * 2][0] = b[i][0];
2357         }
2358
2359      the a and b accesses have a single ARRAY_REF component reference [0]
2360      but have two subscripts.  */
2361   if (DR_UNCONSTRAINED_BASE (a))
2362     num_dimensions_a -= 1;
2363   if (DR_UNCONSTRAINED_BASE (b))
2364     num_dimensions_b -= 1;
2365
2366   /* These structures describe sequences of component references in
2367      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
2368      specific access function.  */
2369   struct {
2370     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
2371        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
2372        indices.  In C notation, these are the indices of the rightmost
2373        component references; e.g. for a sequence .b.c.d, the start
2374        index is for .d.  */
2375     unsigned int start_a;
2376     unsigned int start_b;
2377
2378     /* The sequence contains LENGTH consecutive access functions from
2379        each DR.  */
2380     unsigned int length;
2381
2382     /* The enclosing objects for the A and B sequences respectively,
2383        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
2384        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
2385     tree object_a;
2386     tree object_b;
2387   } full_seq = {}, struct_seq = {};
2388
2389   /* Before each iteration of the loop:
2390
2391      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
2392      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
2393   unsigned int index_a = 0;
2394   unsigned int index_b = 0;
2395   tree ref_a = DR_REF (a);
2396   tree ref_b = DR_REF (b);
2397
2398   /* Now walk the component references from the final DR_REFs back up to
2399      the enclosing base objects.  Each component reference corresponds
2400      to one access function in the DR, with access function 0 being for
2401      the final DR_REF and the highest-indexed access function being the
2402      one that is applied to the base of the DR.
2403
2404      Look for a sequence of component references whose access functions
2405      are comparable (see access_fn_components_comparable_p).  If more
2406      than one such sequence exists, pick the one nearest the base
2407      (which is the leftmost sequence in C notation).  Store this sequence
2408      in FULL_SEQ.
2409
2410      For example, if we have:
2411
2412         struct foo { struct bar s; ... } (*a)[10], (*b)[10];
2413
2414         A: a[0][i].s.c.d
2415         B: __real b[0][i].s.e[i].f
2416
2417      (where d is the same type as the real component of f) then the access
2418      functions would be:
2419
2420                          0   1   2   3
2421         A:              .d  .c  .s [i]
2422
2423                  0   1   2   3   4   5
2424         B:  __real  .f [i]  .e  .s [i]
2425
2426      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
2427      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
2428      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
2429      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
2430      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
2431      index foo[10] arrays, so is again comparable.  The sequence is
2432      therefore:
2433
2434         A: [1, 3]  (i.e. [i].s.c)
2435         B: [3, 5]  (i.e. [i].s.e)
2436
2437      Also look for sequences of component references whose access
2438      functions are comparable and whose enclosing objects have the same
2439      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
2440      example, STRUCT_SEQ would be:
2441
2442         A: [1, 2]  (i.e. s.c)
2443         B: [3, 4]  (i.e. s.e)  */
2444   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
2445     {
2446       /* REF_A and REF_B must be one of the component access types
2447          allowed by dr_analyze_indices.  */
2448       gcc_checking_assert (access_fn_component_p (ref_a));
2449       gcc_checking_assert (access_fn_component_p (ref_b));
2450
2451       /* Get the immediately-enclosing objects for REF_A and REF_B,
2452          i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
2453          and DR_ACCESS_FN (B, INDEX_B).  */
2454       tree object_a = TREE_OPERAND (ref_a, 0);
2455       tree object_b = TREE_OPERAND (ref_b, 0);
2456
2457       tree type_a = TREE_TYPE (object_a);
2458       tree type_b = TREE_TYPE (object_b);
2459       if (access_fn_components_comparable_p (ref_a, ref_b))
2460         {
2461           /* This pair of component accesses is comparable for dependence
2462              analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
2463              DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
2464           if (full_seq.start_a + full_seq.length != index_a
2465               || full_seq.start_b + full_seq.length != index_b)
2466             {
2467               /* The accesses don't extend the current sequence,
2468                  so start a new one here.  */
2469               full_seq.start_a = index_a;
2470               full_seq.start_b = index_b;
2471               full_seq.length = 0;
2472             }
2473
2474           /* Add this pair of references to the sequence.  */
2475           full_seq.length += 1;
2476           full_seq.object_a = object_a;
2477           full_seq.object_b = object_b;
2478
2479           /* If the enclosing objects are structures (and thus have the
2480              same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
2481           if (TREE_CODE (type_a) == RECORD_TYPE)
2482             struct_seq = full_seq;
2483
2484           /* Move to the next containing reference for both A and B.  */
2485           ref_a = object_a;
2486           ref_b = object_b;
2487           index_a += 1;
2488           index_b += 1;
2489           continue;
2490         }
2491
2492       /* Try to approach equal type sizes.  */
2493       if (!COMPLETE_TYPE_P (type_a)
2494           || !COMPLETE_TYPE_P (type_b)
2495           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
2496           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
2497         break;
2498
2499       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
2500       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
2501       if (size_a <= size_b)
2502         {
2503           index_a += 1;
2504           ref_a = object_a;
2505         }
2506       if (size_b <= size_a)
2507         {
2508           index_b += 1;
2509           ref_b = object_b;
2510         }
2511     }
2512
2513   /* See whether FULL_SEQ ends at the base and whether the two bases
2514      are equal.  We do not care about TBAA or alignment info so we can
2515      use OEP_ADDRESS_OF to avoid false negatives.  */
2516   tree base_a = DR_BASE_OBJECT (a);
2517   tree base_b = DR_BASE_OBJECT (b);
2518   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
2519                       && full_seq.start_b + full_seq.length == num_dimensions_b
2520                       && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
2521                       && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
2522                       && types_compatible_p (TREE_TYPE (base_a),
2523                                              TREE_TYPE (base_b))
2524                       && (!loop_nest.exists ()
2525                           || (object_address_invariant_in_loop_p
2526                               (loop_nest[0], base_a))));
2527
2528   /* If the bases are the same, we can include the base variation too.
2529      E.g. the b accesses in:
2530
2531        for (int i = 0; i < n; ++i)
2532          b[i + 4][0] = b[i][0];
2533
2534      have a definite dependence distance of 4, while for:
2535
2536        for (int i = 0; i < n; ++i)
2537          a[i + 4][0] = b[i][0];
2538
2539      the dependence distance depends on the gap between a and b.
2540
2541      If the bases are different then we can only rely on the sequence
2542      rooted at a structure access, since arrays are allowed to overlap
2543      arbitrarily and change shape arbitrarily.  E.g. we treat this as
2544      valid code:
2545
2546        int a[256];
2547        ...
2548        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
2549
2550      where two lvalues with the same int[4][3] type overlap, and where
2551      both lvalues are distinct from the object's declared type.  */
2552   if (same_base_p)
2553     {
2554       if (DR_UNCONSTRAINED_BASE (a))
2555         full_seq.length += 1;
2556     }
2557   else
2558     full_seq = struct_seq;
2559
2560   /* Punt if we didn't find a suitable sequence.  */
2561   if (full_seq.length == 0)
2562     {
2563       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2564       return res;
2565     }
2566
2567   if (!same_base_p)
2568     {
2569       /* Partial overlap is possible for different bases when strict aliasing
2570          is not in effect.  It's also possible if either base involves a union
2571          access; e.g. for:
2572
2573            struct s1 { int a[2]; };
2574            struct s2 { struct s1 b; int c; };
2575            struct s3 { int d; struct s1 e; };
2576            union u { struct s2 f; struct s3 g; } *p, *q;
2577
2578          the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
2579          "p->g.e" (base "p->g") and might partially overlap the s1 at
2580          "q->g.e" (base "q->g").  */
2581       if (!flag_strict_aliasing
2582           || ref_contains_union_access_p (full_seq.object_a)
2583           || ref_contains_union_access_p (full_seq.object_b))
2584         {
2585           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2586           return res;
2587         }
2588
2589       DDR_COULD_BE_INDEPENDENT_P (res) = true;
2590       if (!loop_nest.exists ()
2591           || (object_address_invariant_in_loop_p (loop_nest[0],
2592                                                   full_seq.object_a)
2593               && object_address_invariant_in_loop_p (loop_nest[0],
2594                                                      full_seq.object_b)))
2595         {
2596           DDR_OBJECT_A (res) = full_seq.object_a;
2597           DDR_OBJECT_B (res) = full_seq.object_b;
2598         }
2599     }
2600
2601   DDR_AFFINE_P (res) = true;
2602   DDR_ARE_DEPENDENT (res) = NULL_TREE;
2603   DDR_SUBSCRIPTS (res).create (full_seq.length);
2604   DDR_LOOP_NEST (res) = loop_nest;
2605   DDR_INNER_LOOP (res) = 0;
2606   DDR_SELF_REFERENCE (res) = false;
2607
2608   for (i = 0; i < full_seq.length; ++i)
2609     {
2610       struct subscript *subscript;
2611
2612       subscript = XNEW (struct subscript);
2613       SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
2614       SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
2615       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
2616       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
2617       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
2618       SUB_DISTANCE (subscript) = chrec_dont_know;
2619       DDR_SUBSCRIPTS (res).safe_push (subscript);
2620     }
2621
2622   return res;
2623 }
2624
2625 /* Frees memory used by the conflict function F.  */
2626
2627 static void
2628 free_conflict_function (conflict_function *f)
2629 {
2630   unsigned i;
2631
2632   if (CF_NONTRIVIAL_P (f))
2633     {
2634       for (i = 0; i < f->n; i++)
2635         affine_fn_free (f->fns[i]);
2636     }
2637   free (f);
2638 }
2639
2640 /* Frees memory used by SUBSCRIPTS.  */
2641
2642 static void
2643 free_subscripts (vec<subscript_p> subscripts)
2644 {
2645   unsigned i;
2646   subscript_p s;
2647
2648   FOR_EACH_VEC_ELT (subscripts, i, s)
2649     {
2650       free_conflict_function (s->conflicting_iterations_in_a);
2651       free_conflict_function (s->conflicting_iterations_in_b);
2652       free (s);
2653     }
2654   subscripts.release ();
2655 }
2656
2657 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
2658    description.  */
2659
2660 static inline void
2661 finalize_ddr_dependent (struct data_dependence_relation *ddr,
2662                         tree chrec)
2663 {
2664   DDR_ARE_DEPENDENT (ddr) = chrec;
2665   free_subscripts (DDR_SUBSCRIPTS (ddr));
2666   DDR_SUBSCRIPTS (ddr).create (0);
2667 }
2668
2669 /* The dependence relation DDR cannot be represented by a distance
2670    vector.  */
2671
2672 static inline void
2673 non_affine_dependence_relation (struct data_dependence_relation *ddr)
2674 {
2675   if (dump_file && (dump_flags & TDF_DETAILS))
2676     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
2677
2678   DDR_AFFINE_P (ddr) = false;
2679 }
2680
2681 \f
2682
2683 /* This section contains the classic Banerjee tests.  */
2684
2685 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
2686    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
2687
2688 static inline bool
2689 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2690 {
2691   return (evolution_function_is_constant_p (chrec_a)
2692           && evolution_function_is_constant_p (chrec_b));
2693 }
2694
2695 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
2696    variable, i.e., if the SIV (Single Index Variable) test is true.  */
2697
2698 static bool
2699 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2700 {
2701   if ((evolution_function_is_constant_p (chrec_a)
2702        && evolution_function_is_univariate_p (chrec_b))
2703       || (evolution_function_is_constant_p (chrec_b)
2704           && evolution_function_is_univariate_p (chrec_a)))
2705     return true;
2706
2707   if (evolution_function_is_univariate_p (chrec_a)
2708       && evolution_function_is_univariate_p (chrec_b))
2709     {
2710       switch (TREE_CODE (chrec_a))
2711         {
2712         case POLYNOMIAL_CHREC:
2713           switch (TREE_CODE (chrec_b))
2714             {
2715             case POLYNOMIAL_CHREC:
2716               if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
2717                 return false;
2718               /* FALLTHRU */
2719
2720             default:
2721               return true;
2722             }
2723
2724         default:
2725           return true;
2726         }
2727     }
2728
2729   return false;
2730 }
2731
2732 /* Creates a conflict function with N dimensions.  The affine functions
2733    in each dimension follow.  */
2734
2735 static conflict_function *
2736 conflict_fn (unsigned n, ...)
2737 {
2738   unsigned i;
2739   conflict_function *ret = XCNEW (conflict_function);
2740   va_list ap;
2741
2742   gcc_assert (n > 0 && n <= MAX_DIM);
2743   va_start (ap, n);
2744
2745   ret->n = n;
2746   for (i = 0; i < n; i++)
2747     ret->fns[i] = va_arg (ap, affine_fn);
2748   va_end (ap);
2749
2750   return ret;
2751 }
2752
2753 /* Returns constant affine function with value CST.  */
2754
2755 static affine_fn
2756 affine_fn_cst (tree cst)
2757 {
2758   affine_fn fn;
2759   fn.create (1);
2760   fn.quick_push (cst);
2761   return fn;
2762 }
2763
2764 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
2765
2766 static affine_fn
2767 affine_fn_univar (tree cst, unsigned dim, tree coef)
2768 {
2769   affine_fn fn;
2770   fn.create (dim + 1);
2771   unsigned i;
2772
2773   gcc_assert (dim > 0);
2774   fn.quick_push (cst);
2775   for (i = 1; i < dim; i++)
2776     fn.quick_push (integer_zero_node);
2777   fn.quick_push (coef);
2778   return fn;
2779 }
2780
2781 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
2782    *OVERLAPS_B are initialized to the functions that describe the
2783    relation between the elements accessed twice by CHREC_A and
2784    CHREC_B.  For k >= 0, the following property is verified:
2785
2786    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2787
2788 static void
2789 analyze_ziv_subscript (tree chrec_a,
2790                        tree chrec_b,
2791                        conflict_function **overlaps_a,
2792                        conflict_function **overlaps_b,
2793                        tree *last_conflicts)
2794 {
2795   tree type, difference;
2796   dependence_stats.num_ziv++;
2797
2798   if (dump_file && (dump_flags & TDF_DETAILS))
2799     fprintf (dump_file, "(analyze_ziv_subscript \n");
2800
2801   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2802   chrec_a = chrec_convert (type, chrec_a, NULL);
2803   chrec_b = chrec_convert (type, chrec_b, NULL);
2804   difference = chrec_fold_minus (type, chrec_a, chrec_b);
2805
2806   switch (TREE_CODE (difference))
2807     {
2808     case INTEGER_CST:
2809       if (integer_zerop (difference))
2810         {
2811           /* The difference is equal to zero: the accessed index
2812              overlaps for each iteration in the loop.  */
2813           *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2814           *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2815           *last_conflicts = chrec_dont_know;
2816           dependence_stats.num_ziv_dependent++;
2817         }
2818       else
2819         {
2820           /* The accesses do not overlap.  */
2821           *overlaps_a = conflict_fn_no_dependence ();
2822           *overlaps_b = conflict_fn_no_dependence ();
2823           *last_conflicts = integer_zero_node;
2824           dependence_stats.num_ziv_independent++;
2825         }
2826       break;
2827
2828     default:
2829       /* We're not sure whether the indexes overlap.  For the moment,
2830          conservatively answer "don't know".  */
2831       if (dump_file && (dump_flags & TDF_DETAILS))
2832         fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
2833
2834       *overlaps_a = conflict_fn_not_known ();
2835       *overlaps_b = conflict_fn_not_known ();
2836       *last_conflicts = chrec_dont_know;
2837       dependence_stats.num_ziv_unimplemented++;
2838       break;
2839     }
2840
2841   if (dump_file && (dump_flags & TDF_DETAILS))
2842     fprintf (dump_file, ")\n");
2843 }
2844
2845 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
2846    and only if it fits to the int type.  If this is not the case, or the
2847    bound  on the number of iterations of LOOP could not be derived, returns
2848    chrec_dont_know.  */
2849
2850 static tree
2851 max_stmt_executions_tree (struct loop *loop)
2852 {
2853   widest_int nit;
2854
2855   if (!max_stmt_executions (loop, &nit))
2856     return chrec_dont_know;
2857
2858   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
2859     return chrec_dont_know;
2860
2861   return wide_int_to_tree (unsigned_type_node, nit);
2862 }
2863
2864 /* Determine whether the CHREC is always positive/negative.  If the expression
2865    cannot be statically analyzed, return false, otherwise set the answer into
2866    VALUE.  */
2867
2868 static bool
2869 chrec_is_positive (tree chrec, bool *value)
2870 {
2871   bool value0, value1, value2;
2872   tree end_value, nb_iter;
2873
2874   switch (TREE_CODE (chrec))
2875     {
2876     case POLYNOMIAL_CHREC:
2877       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
2878           || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
2879         return false;
2880
2881       /* FIXME -- overflows.  */
2882       if (value0 == value1)
2883         {
2884           *value = value0;
2885           return true;
2886         }
2887
2888       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
2889          and the proof consists in showing that the sign never
2890          changes during the execution of the loop, from 0 to
2891          loop->nb_iterations.  */
2892       if (!evolution_function_is_affine_p (chrec))
2893         return false;
2894
2895       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
2896       if (chrec_contains_undetermined (nb_iter))
2897         return false;
2898
2899 #if 0
2900       /* TODO -- If the test is after the exit, we may decrease the number of
2901          iterations by one.  */
2902       if (after_exit)
2903         nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
2904 #endif
2905
2906       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
2907
2908       if (!chrec_is_positive (end_value, &value2))
2909         return false;
2910
2911       *value = value0;
2912       return value0 == value1;
2913
2914     case INTEGER_CST:
2915       switch (tree_int_cst_sgn (chrec))
2916         {
2917         case -1:
2918           *value = false;
2919           break;
2920         case 1:
2921           *value = true;
2922           break;
2923         default:
2924           return false;
2925         }
2926       return true;
2927
2928     default:
2929       return false;
2930     }
2931 }
2932
2933
2934 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
2935    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
2936    *OVERLAPS_B are initialized to the functions that describe the
2937    relation between the elements accessed twice by CHREC_A and
2938    CHREC_B.  For k >= 0, the following property is verified:
2939
2940    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2941
2942 static void
2943 analyze_siv_subscript_cst_affine (tree chrec_a,
2944                                   tree chrec_b,
2945                                   conflict_function **overlaps_a,
2946                                   conflict_function **overlaps_b,
2947                                   tree *last_conflicts)
2948 {
2949   bool value0, value1, value2;
2950   tree type, difference, tmp;
2951
2952   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2953   chrec_a = chrec_convert (type, chrec_a, NULL);
2954   chrec_b = chrec_convert (type, chrec_b, NULL);
2955   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
2956
2957   /* Special case overlap in the first iteration.  */
2958   if (integer_zerop (difference))
2959     {
2960       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2961       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2962       *last_conflicts = integer_one_node;
2963       return;
2964     }
2965
2966   if (!chrec_is_positive (initial_condition (difference), &value0))
2967     {
2968       if (dump_file && (dump_flags & TDF_DETAILS))
2969         fprintf (dump_file, "siv test failed: chrec is not positive.\n");
2970
2971       dependence_stats.num_siv_unimplemented++;
2972       *overlaps_a = conflict_fn_not_known ();
2973       *overlaps_b = conflict_fn_not_known ();
2974       *last_conflicts = chrec_dont_know;
2975       return;
2976     }
2977   else
2978     {
2979       if (value0 == false)
2980         {
2981           if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
2982             {
2983               if (dump_file && (dump_flags & TDF_DETAILS))
2984                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
2985
2986               *overlaps_a = conflict_fn_not_known ();
2987               *overlaps_b = conflict_fn_not_known ();
2988               *last_conflicts = chrec_dont_know;
2989               dependence_stats.num_siv_unimplemented++;
2990               return;
2991             }
2992           else
2993             {
2994               if (value1 == true)
2995                 {
2996                   /* Example:
2997                      chrec_a = 12
2998                      chrec_b = {10, +, 1}
2999                   */
3000
3001                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3002                     {
3003                       HOST_WIDE_INT numiter;
3004                       struct loop *loop = get_chrec_loop (chrec_b);
3005
3006                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3007                       tmp = fold_build2 (EXACT_DIV_EXPR, type,
3008                                          fold_build1 (ABS_EXPR, type, difference),
3009                                          CHREC_RIGHT (chrec_b));
3010                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3011                       *last_conflicts = integer_one_node;
3012
3013
3014                       /* Perform weak-zero siv test to see if overlap is
3015                          outside the loop bounds.  */
3016                       numiter = max_stmt_executions_int (loop);
3017
3018                       if (numiter >= 0
3019                           && compare_tree_int (tmp, numiter) > 0)
3020                         {
3021                           free_conflict_function (*overlaps_a);
3022                           free_conflict_function (*overlaps_b);
3023                           *overlaps_a = conflict_fn_no_dependence ();
3024                           *overlaps_b = conflict_fn_no_dependence ();
3025                           *last_conflicts = integer_zero_node;
3026                           dependence_stats.num_siv_independent++;
3027                           return;
3028                         }
3029                       dependence_stats.num_siv_dependent++;
3030                       return;
3031                     }
3032
3033                   /* When the step does not divide the difference, there are
3034                      no overlaps.  */
3035                   else
3036                     {
3037                       *overlaps_a = conflict_fn_no_dependence ();
3038                       *overlaps_b = conflict_fn_no_dependence ();
3039                       *last_conflicts = integer_zero_node;
3040                       dependence_stats.num_siv_independent++;
3041                       return;
3042                     }
3043                 }
3044
3045               else
3046                 {
3047                   /* Example:
3048                      chrec_a = 12
3049                      chrec_b = {10, +, -1}
3050
3051                      In this case, chrec_a will not overlap with chrec_b.  */
3052                   *overlaps_a = conflict_fn_no_dependence ();
3053                   *overlaps_b = conflict_fn_no_dependence ();
3054                   *last_conflicts = integer_zero_node;
3055                   dependence_stats.num_siv_independent++;
3056                   return;
3057                 }
3058             }
3059         }
3060       else
3061         {
3062           if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3063             {
3064               if (dump_file && (dump_flags & TDF_DETAILS))
3065                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3066
3067               *overlaps_a = conflict_fn_not_known ();
3068               *overlaps_b = conflict_fn_not_known ();
3069               *last_conflicts = chrec_dont_know;
3070               dependence_stats.num_siv_unimplemented++;
3071               return;
3072             }
3073           else
3074             {
3075               if (value2 == false)
3076                 {
3077                   /* Example:
3078                      chrec_a = 3
3079                      chrec_b = {10, +, -1}
3080                   */
3081                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3082                     {
3083                       HOST_WIDE_INT numiter;
3084                       struct loop *loop = get_chrec_loop (chrec_b);
3085
3086                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3087                       tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3088                                          CHREC_RIGHT (chrec_b));
3089                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3090                       *last_conflicts = integer_one_node;
3091
3092                       /* Perform weak-zero siv test to see if overlap is
3093                          outside the loop bounds.  */
3094                       numiter = max_stmt_executions_int (loop);
3095
3096                       if (numiter >= 0
3097                           && compare_tree_int (tmp, numiter) > 0)
3098                         {
3099                           free_conflict_function (*overlaps_a);
3100                           free_conflict_function (*overlaps_b);
3101                           *overlaps_a = conflict_fn_no_dependence ();
3102                           *overlaps_b = conflict_fn_no_dependence ();
3103                           *last_conflicts = integer_zero_node;
3104                           dependence_stats.num_siv_independent++;
3105                           return;
3106                         }
3107                       dependence_stats.num_siv_dependent++;
3108                       return;
3109                     }
3110
3111                   /* When the step does not divide the difference, there
3112                      are no overlaps.  */
3113                   else
3114                     {
3115                       *overlaps_a = conflict_fn_no_dependence ();
3116                       *overlaps_b = conflict_fn_no_dependence ();
3117                       *last_conflicts = integer_zero_node;
3118                       dependence_stats.num_siv_independent++;
3119                       return;
3120                     }
3121                 }
3122               else
3123                 {
3124                   /* Example:
3125                      chrec_a = 3
3126                      chrec_b = {4, +, 1}
3127
3128                      In this case, chrec_a will not overlap with chrec_b.  */
3129                   *overlaps_a = conflict_fn_no_dependence ();
3130                   *overlaps_b = conflict_fn_no_dependence ();
3131                   *last_conflicts = integer_zero_node;
3132                   dependence_stats.num_siv_independent++;
3133                   return;
3134                 }
3135             }
3136         }
3137     }
3138 }
3139
3140 /* Helper recursive function for initializing the matrix A.  Returns
3141    the initial value of CHREC.  */
3142
3143 static tree
3144 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3145 {
3146   gcc_assert (chrec);
3147
3148   switch (TREE_CODE (chrec))
3149     {
3150     case POLYNOMIAL_CHREC:
3151       A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
3152       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3153
3154     case PLUS_EXPR:
3155     case MULT_EXPR:
3156     case MINUS_EXPR:
3157       {
3158         tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3159         tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3160
3161         return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3162       }
3163
3164     CASE_CONVERT:
3165       {
3166         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3167         return chrec_convert (chrec_type (chrec), op, NULL);
3168       }
3169
3170     case BIT_NOT_EXPR:
3171       {
3172         /* Handle ~X as -1 - X.  */
3173         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3174         return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
3175                               build_int_cst (TREE_TYPE (chrec), -1), op);
3176       }
3177
3178     case INTEGER_CST:
3179       return chrec;
3180
3181     default:
3182       gcc_unreachable ();
3183       return NULL_TREE;
3184     }
3185 }
3186
3187 #define FLOOR_DIV(x,y) ((x) / (y))
3188
3189 /* Solves the special case of the Diophantine equation:
3190    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3191
3192    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
3193    number of iterations that loops X and Y run.  The overlaps will be
3194    constructed as evolutions in dimension DIM.  */
3195
3196 static void
3197 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
3198                                          HOST_WIDE_INT step_a,
3199                                          HOST_WIDE_INT step_b,
3200                                          affine_fn *overlaps_a,
3201                                          affine_fn *overlaps_b,
3202                                          tree *last_conflicts, int dim)
3203 {
3204   if (((step_a > 0 && step_b > 0)
3205        || (step_a < 0 && step_b < 0)))
3206     {
3207       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
3208       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
3209
3210       gcd_steps_a_b = gcd (step_a, step_b);
3211       step_overlaps_a = step_b / gcd_steps_a_b;
3212       step_overlaps_b = step_a / gcd_steps_a_b;
3213
3214       if (niter > 0)
3215         {
3216           tau2 = FLOOR_DIV (niter, step_overlaps_a);
3217           tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
3218           last_conflict = tau2;
3219           *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3220         }
3221       else
3222         *last_conflicts = chrec_dont_know;
3223
3224       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
3225                                       build_int_cst (NULL_TREE,
3226                                                      step_overlaps_a));
3227       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
3228                                       build_int_cst (NULL_TREE,
3229                                                      step_overlaps_b));
3230     }
3231
3232   else
3233     {
3234       *overlaps_a = affine_fn_cst (integer_zero_node);
3235       *overlaps_b = affine_fn_cst (integer_zero_node);
3236       *last_conflicts = integer_zero_node;
3237     }
3238 }
3239
3240 /* Solves the special case of a Diophantine equation where CHREC_A is
3241    an affine bivariate function, and CHREC_B is an affine univariate
3242    function.  For example,
3243
3244    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
3245
3246    has the following overlapping functions:
3247
3248    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
3249    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
3250    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
3251
3252    FORNOW: This is a specialized implementation for a case occurring in
3253    a common benchmark.  Implement the general algorithm.  */
3254
3255 static void
3256 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
3257                                       conflict_function **overlaps_a,
3258                                       conflict_function **overlaps_b,
3259                                       tree *last_conflicts)
3260 {
3261   bool xz_p, yz_p, xyz_p;
3262   HOST_WIDE_INT step_x, step_y, step_z;
3263   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
3264   affine_fn overlaps_a_xz, overlaps_b_xz;
3265   affine_fn overlaps_a_yz, overlaps_b_yz;
3266   affine_fn overlaps_a_xyz, overlaps_b_xyz;
3267   affine_fn ova1, ova2, ovb;
3268   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
3269
3270   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
3271   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
3272   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
3273
3274   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
3275   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
3276   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
3277
3278   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
3279     {
3280       if (dump_file && (dump_flags & TDF_DETAILS))
3281         fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
3282
3283       *overlaps_a = conflict_fn_not_known ();
3284       *overlaps_b = conflict_fn_not_known ();
3285       *last_conflicts = chrec_dont_know;
3286       return;
3287     }
3288
3289   niter = MIN (niter_x, niter_z);
3290   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
3291                                            &overlaps_a_xz,
3292                                            &overlaps_b_xz,
3293                                            &last_conflicts_xz, 1);
3294   niter = MIN (niter_y, niter_z);
3295   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
3296                                            &overlaps_a_yz,
3297                                            &overlaps_b_yz,
3298                                            &last_conflicts_yz, 2);
3299   niter = MIN (niter_x, niter_z);
3300   niter = MIN (niter_y, niter);
3301   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
3302                                            &overlaps_a_xyz,
3303                                            &overlaps_b_xyz,
3304                                            &last_conflicts_xyz, 3);
3305
3306   xz_p = !integer_zerop (last_conflicts_xz);
3307   yz_p = !integer_zerop (last_conflicts_yz);
3308   xyz_p = !integer_zerop (last_conflicts_xyz);
3309
3310   if (xz_p || yz_p || xyz_p)
3311     {
3312       ova1 = affine_fn_cst (integer_zero_node);
3313       ova2 = affine_fn_cst (integer_zero_node);
3314       ovb = affine_fn_cst (integer_zero_node);
3315       if (xz_p)
3316         {
3317           affine_fn t0 = ova1;
3318           affine_fn t2 = ovb;
3319
3320           ova1 = affine_fn_plus (ova1, overlaps_a_xz);
3321           ovb = affine_fn_plus (ovb, overlaps_b_xz);
3322           affine_fn_free (t0);
3323           affine_fn_free (t2);
3324           *last_conflicts = last_conflicts_xz;
3325         }
3326       if (yz_p)
3327         {
3328           affine_fn t0 = ova2;
3329           affine_fn t2 = ovb;
3330
3331           ova2 = affine_fn_plus (ova2, overlaps_a_yz);
3332           ovb = affine_fn_plus (ovb, overlaps_b_yz);
3333           affine_fn_free (t0);
3334           affine_fn_free (t2);
3335           *last_conflicts = last_conflicts_yz;
3336         }
3337       if (xyz_p)
3338         {
3339           affine_fn t0 = ova1;
3340           affine_fn t2 = ova2;
3341           affine_fn t4 = ovb;
3342
3343           ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
3344           ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
3345           ovb = affine_fn_plus (ovb, overlaps_b_xyz);
3346           affine_fn_free (t0);
3347           affine_fn_free (t2);
3348           affine_fn_free (t4);
3349           *last_conflicts = last_conflicts_xyz;
3350         }
3351       *overlaps_a = conflict_fn (2, ova1, ova2);
3352       *overlaps_b = conflict_fn (1, ovb);
3353     }
3354   else
3355     {
3356       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3357       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3358       *last_conflicts = integer_zero_node;
3359     }
3360
3361   affine_fn_free (overlaps_a_xz);
3362   affine_fn_free (overlaps_b_xz);
3363   affine_fn_free (overlaps_a_yz);
3364   affine_fn_free (overlaps_b_yz);
3365   affine_fn_free (overlaps_a_xyz);
3366   affine_fn_free (overlaps_b_xyz);
3367 }
3368
3369 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
3370
3371 static void
3372 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
3373                     int size)
3374 {
3375   memcpy (vec2, vec1, size * sizeof (*vec1));
3376 }
3377
3378 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
3379
3380 static void
3381 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
3382                     int m, int n)
3383 {
3384   int i;
3385
3386   for (i = 0; i < m; i++)
3387     lambda_vector_copy (mat1[i], mat2[i], n);
3388 }
3389
3390 /* Store the N x N identity matrix in MAT.  */
3391
3392 static void
3393 lambda_matrix_id (lambda_matrix mat, int size)
3394 {
3395   int i, j;
3396
3397   for (i = 0; i < size; i++)
3398     for (j = 0; j < size; j++)
3399       mat[i][j] = (i == j) ? 1 : 0;
3400 }
3401
3402 /* Return the first nonzero element of vector VEC1 between START and N.
3403    We must have START <= N.   Returns N if VEC1 is the zero vector.  */
3404
3405 static int
3406 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
3407 {
3408   int j = start;
3409   while (j < n && vec1[j] == 0)
3410     j++;
3411   return j;
3412 }
3413
3414 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3415    R2 = R2 + CONST1 * R1.  */
3416
3417 static void
3418 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
3419 {
3420   int i;
3421
3422   if (const1 == 0)
3423     return;
3424
3425   for (i = 0; i < n; i++)
3426     mat[r2][i] += const1 * mat[r1][i];
3427 }
3428
3429 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3430    and store the result in VEC2.  */
3431
3432 static void
3433 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
3434                           int size, int const1)
3435 {
3436   int i;
3437
3438   if (const1 == 0)
3439     lambda_vector_clear (vec2, size);
3440   else
3441     for (i = 0; i < size; i++)
3442       vec2[i] = const1 * vec1[i];
3443 }
3444
3445 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
3446
3447 static void
3448 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
3449                       int size)
3450 {
3451   lambda_vector_mult_const (vec1, vec2, size, -1);
3452 }
3453
3454 /* Negate row R1 of matrix MAT which has N columns.  */
3455
3456 static void
3457 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
3458 {
3459   lambda_vector_negate (mat[r1], mat[r1], n);
3460 }
3461
3462 /* Return true if two vectors are equal.  */
3463
3464 static bool
3465 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
3466 {
3467   int i;
3468   for (i = 0; i < size; i++)
3469     if (vec1[i] != vec2[i])
3470       return false;
3471   return true;
3472 }
3473
3474 /* Given an M x N integer matrix A, this function determines an M x
3475    M unimodular matrix U, and an M x N echelon matrix S such that
3476    "U.A = S".  This decomposition is also known as "right Hermite".
3477
3478    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3479    Restructuring Compilers" Utpal Banerjee.  */
3480
3481 static void
3482 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
3483                              lambda_matrix S, lambda_matrix U)
3484 {
3485   int i, j, i0 = 0;
3486
3487   lambda_matrix_copy (A, S, m, n);
3488   lambda_matrix_id (U, m);
3489
3490   for (j = 0; j < n; j++)
3491     {
3492       if (lambda_vector_first_nz (S[j], m, i0) < m)
3493         {
3494           ++i0;
3495           for (i = m - 1; i >= i0; i--)
3496             {
3497               while (S[i][j] != 0)
3498                 {
3499                   int sigma, factor, a, b;
3500
3501                   a = S[i-1][j];
3502                   b = S[i][j];
3503                   sigma = (a * b < 0) ? -1: 1;
3504                   a = abs (a);
3505                   b = abs (b);
3506                   factor = sigma * (a / b);
3507
3508                   lambda_matrix_row_add (S, n, i, i-1, -factor);
3509                   std::swap (S[i], S[i-1]);
3510
3511                   lambda_matrix_row_add (U, m, i, i-1, -factor);
3512                   std::swap (U[i], U[i-1]);
3513                 }
3514             }
3515         }
3516     }
3517 }
3518
3519 /* Determines the overlapping elements due to accesses CHREC_A and
3520    CHREC_B, that are affine functions.  This function cannot handle
3521    symbolic evolution functions, ie. when initial conditions are
3522    parameters, because it uses lambda matrices of integers.  */
3523
3524 static void
3525 analyze_subscript_affine_affine (tree chrec_a,
3526                                  tree chrec_b,
3527                                  conflict_function **overlaps_a,
3528                                  conflict_function **overlaps_b,
3529                                  tree *last_conflicts)
3530 {
3531   unsigned nb_vars_a, nb_vars_b, dim;
3532   HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
3533   lambda_matrix A, U, S;
3534   struct obstack scratch_obstack;
3535
3536   if (eq_evolutions_p (chrec_a, chrec_b))
3537     {
3538       /* The accessed index overlaps for each iteration in the
3539          loop.  */
3540       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3541       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3542       *last_conflicts = chrec_dont_know;
3543       return;
3544     }
3545   if (dump_file && (dump_flags & TDF_DETAILS))
3546     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
3547
3548   /* For determining the initial intersection, we have to solve a
3549      Diophantine equation.  This is the most time consuming part.
3550
3551      For answering to the question: "Is there a dependence?" we have
3552      to prove that there exists a solution to the Diophantine
3553      equation, and that the solution is in the iteration domain,
3554      i.e. the solution is positive or zero, and that the solution
3555      happens before the upper bound loop.nb_iterations.  Otherwise
3556      there is no dependence.  This function outputs a description of
3557      the iterations that hold the intersections.  */
3558
3559   nb_vars_a = nb_vars_in_chrec (chrec_a);
3560   nb_vars_b = nb_vars_in_chrec (chrec_b);
3561
3562   gcc_obstack_init (&scratch_obstack);
3563
3564   dim = nb_vars_a + nb_vars_b;
3565   U = lambda_matrix_new (dim, dim, &scratch_obstack);
3566   A = lambda_matrix_new (dim, 1, &scratch_obstack);
3567   S = lambda_matrix_new (dim, 1, &scratch_obstack);
3568
3569   init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
3570   init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
3571   gamma = init_b - init_a;
3572
3573   /* Don't do all the hard work of solving the Diophantine equation
3574      when we already know the solution: for example,
3575      | {3, +, 1}_1
3576      | {3, +, 4}_2
3577      | gamma = 3 - 3 = 0.
3578      Then the first overlap occurs during the first iterations:
3579      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
3580   */
3581   if (gamma == 0)
3582     {
3583       if (nb_vars_a == 1 && nb_vars_b == 1)
3584         {
3585           HOST_WIDE_INT step_a, step_b;
3586           HOST_WIDE_INT niter, niter_a, niter_b;
3587           affine_fn ova, ovb;
3588
3589           niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
3590           niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
3591           niter = MIN (niter_a, niter_b);
3592           step_a = int_cst_value (CHREC_RIGHT (chrec_a));
3593           step_b = int_cst_value (CHREC_RIGHT (chrec_b));
3594
3595           compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
3596                                                    &ova, &ovb,
3597                                                    last_conflicts, 1);
3598           *overlaps_a = conflict_fn (1, ova);
3599           *overlaps_b = conflict_fn (1, ovb);
3600         }
3601
3602       else if (nb_vars_a == 2 && nb_vars_b == 1)
3603         compute_overlap_steps_for_affine_1_2
3604           (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
3605
3606       else if (nb_vars_a == 1 && nb_vars_b == 2)
3607         compute_overlap_steps_for_affine_1_2
3608           (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
3609
3610       else
3611         {
3612           if (dump_file && (dump_flags & TDF_DETAILS))
3613             fprintf (dump_file, "affine-affine test failed: too many variables.\n");
3614           *overlaps_a = conflict_fn_not_known ();
3615           *overlaps_b = conflict_fn_not_known ();
3616           *last_conflicts = chrec_dont_know;
3617         }
3618       goto end_analyze_subs_aa;
3619     }
3620
3621   /* U.A = S */
3622   lambda_matrix_right_hermite (A, dim, 1, S, U);
3623
3624   if (S[0][0] < 0)
3625     {
3626       S[0][0] *= -1;
3627       lambda_matrix_row_negate (U, dim, 0);
3628     }
3629   gcd_alpha_beta = S[0][0];
3630
3631   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
3632      but that is a quite strange case.  Instead of ICEing, answer
3633      don't know.  */
3634   if (gcd_alpha_beta == 0)
3635     {
3636       *overlaps_a = conflict_fn_not_known ();
3637       *overlaps_b = conflict_fn_not_known ();
3638       *last_conflicts = chrec_dont_know;
3639       goto end_analyze_subs_aa;
3640     }
3641
3642   /* The classic "gcd-test".  */
3643   if (!int_divides_p (gcd_alpha_beta, gamma))
3644     {
3645       /* The "gcd-test" has determined that there is no integer
3646          solution, i.e. there is no dependence.  */
3647       *overlaps_a = conflict_fn_no_dependence ();
3648       *overlaps_b = conflict_fn_no_dependence ();
3649       *last_conflicts = integer_zero_node;
3650     }
3651
3652   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
3653   else if (nb_vars_a == 1 && nb_vars_b == 1)
3654     {
3655       /* Both functions should have the same evolution sign.  */
3656       if (((A[0][0] > 0 && -A[1][0] > 0)
3657            || (A[0][0] < 0 && -A[1][0] < 0)))
3658         {
3659           /* The solutions are given by:
3660              |
3661              | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
3662              |                           [u21 u22]    [y0]
3663
3664              For a given integer t.  Using the following variables,
3665
3666              | i0 = u11 * gamma / gcd_alpha_beta
3667              | j0 = u12 * gamma / gcd_alpha_beta
3668              | i1 = u21
3669              | j1 = u22
3670
3671              the solutions are:
3672
3673              | x0 = i0 + i1 * t,
3674              | y0 = j0 + j1 * t.  */
3675           HOST_WIDE_INT i0, j0, i1, j1;
3676
3677           i0 = U[0][0] * gamma / gcd_alpha_beta;
3678           j0 = U[0][1] * gamma / gcd_alpha_beta;
3679           i1 = U[1][0];
3680           j1 = U[1][1];
3681
3682           if ((i1 == 0 && i0 < 0)
3683               || (j1 == 0 && j0 < 0))
3684             {
3685               /* There is no solution.
3686                  FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
3687                  falls in here, but for the moment we don't look at the
3688                  upper bound of the iteration domain.  */
3689               *overlaps_a = conflict_fn_no_dependence ();
3690               *overlaps_b = conflict_fn_no_dependence ();
3691               *last_conflicts = integer_zero_node;
3692               goto end_analyze_subs_aa;
3693             }
3694
3695           if (i1 > 0 && j1 > 0)
3696             {
3697               HOST_WIDE_INT niter_a
3698                 = max_stmt_executions_int (get_chrec_loop (chrec_a));
3699               HOST_WIDE_INT niter_b
3700                 = max_stmt_executions_int (get_chrec_loop (chrec_b));
3701               HOST_WIDE_INT niter = MIN (niter_a, niter_b);
3702
3703               /* (X0, Y0) is a solution of the Diophantine equation:
3704                  "chrec_a (X0) = chrec_b (Y0)".  */
3705               HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
3706                                         CEIL (-j0, j1));
3707               HOST_WIDE_INT x0 = i1 * tau1 + i0;
3708               HOST_WIDE_INT y0 = j1 * tau1 + j0;
3709
3710               /* (X1, Y1) is the smallest positive solution of the eq
3711                  "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
3712                  first conflict occurs.  */
3713               HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
3714               HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
3715               HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
3716
3717               if (niter > 0)
3718                 {
3719                   HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter_a - i0, i1),
3720                                             FLOOR_DIV (niter_b - j0, j1));
3721                   HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
3722
3723                   /* If the overlap occurs outside of the bounds of the
3724                      loop, there is no dependence.  */
3725                   if (x1 >= niter_a || y1 >= niter_b)
3726                     {
3727                       *overlaps_a = conflict_fn_no_dependence ();
3728                       *overlaps_b = conflict_fn_no_dependence ();
3729                       *last_conflicts = integer_zero_node;
3730                       goto end_analyze_subs_aa;
3731                     }
3732                   else
3733                     *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3734                 }
3735               else
3736                 *last_conflicts = chrec_dont_know;
3737
3738               *overlaps_a
3739                 = conflict_fn (1,
3740                                affine_fn_univar (build_int_cst (NULL_TREE, x1),
3741                                                  1,
3742                                                  build_int_cst (NULL_TREE, i1)));
3743               *overlaps_b
3744                 = conflict_fn (1,
3745                                affine_fn_univar (build_int_cst (NULL_TREE, y1),
3746                                                  1,
3747                                                  build_int_cst (NULL_TREE, j1)));
3748             }
3749           else
3750             {
3751               /* FIXME: For the moment, the upper bound of the
3752                  iteration domain for i and j is not checked.  */
3753               if (dump_file && (dump_flags & TDF_DETAILS))
3754                 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3755               *overlaps_a = conflict_fn_not_known ();
3756               *overlaps_b = conflict_fn_not_known ();
3757               *last_conflicts = chrec_dont_know;
3758             }
3759         }
3760       else
3761         {
3762           if (dump_file && (dump_flags & TDF_DETAILS))
3763             fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3764           *overlaps_a = conflict_fn_not_known ();
3765           *overlaps_b = conflict_fn_not_known ();
3766           *last_conflicts = chrec_dont_know;
3767         }
3768     }
3769   else
3770     {
3771       if (dump_file && (dump_flags & TDF_DETAILS))
3772         fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3773       *overlaps_a = conflict_fn_not_known ();
3774       *overlaps_b = conflict_fn_not_known ();
3775       *last_conflicts = chrec_dont_know;
3776     }
3777
3778 end_analyze_subs_aa:
3779   obstack_free (&scratch_obstack, NULL);
3780   if (dump_file && (dump_flags & TDF_DETAILS))
3781     {
3782       fprintf (dump_file, "  (overlaps_a = ");
3783       dump_conflict_function (dump_file, *overlaps_a);
3784       fprintf (dump_file, ")\n  (overlaps_b = ");
3785       dump_conflict_function (dump_file, *overlaps_b);
3786       fprintf (dump_file, "))\n");
3787     }
3788 }
3789
3790 /* Returns true when analyze_subscript_affine_affine can be used for
3791    determining the dependence relation between chrec_a and chrec_b,
3792    that contain symbols.  This function modifies chrec_a and chrec_b
3793    such that the analysis result is the same, and such that they don't
3794    contain symbols, and then can safely be passed to the analyzer.
3795
3796    Example: The analysis of the following tuples of evolutions produce
3797    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
3798    vs. {0, +, 1}_1
3799
3800    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
3801    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
3802 */
3803
3804 static bool
3805 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
3806 {
3807   tree diff, type, left_a, left_b, right_b;
3808
3809   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
3810       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
3811     /* FIXME: For the moment not handled.  Might be refined later.  */
3812     return false;
3813
3814   type = chrec_type (*chrec_a);
3815   left_a = CHREC_LEFT (*chrec_a);
3816   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
3817   diff = chrec_fold_minus (type, left_a, left_b);
3818
3819   if (!evolution_function_is_constant_p (diff))
3820     return false;
3821
3822   if (dump_file && (dump_flags & TDF_DETAILS))
3823     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
3824
3825   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
3826                                      diff, CHREC_RIGHT (*chrec_a));
3827   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
3828   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
3829                                      build_int_cst (type, 0),
3830                                      right_b);
3831   return true;
3832 }
3833
3834 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
3835    *OVERLAPS_B are initialized to the functions that describe the
3836    relation between the elements accessed twice by CHREC_A and
3837    CHREC_B.  For k >= 0, the following property is verified:
3838
3839    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3840
3841 static void
3842 analyze_siv_subscript (tree chrec_a,
3843                        tree chrec_b,
3844                        conflict_function **overlaps_a,
3845                        conflict_function **overlaps_b,
3846                        tree *last_conflicts,
3847                        int loop_nest_num)
3848 {
3849   dependence_stats.num_siv++;
3850
3851   if (dump_file && (dump_flags & TDF_DETAILS))
3852     fprintf (dump_file, "(analyze_siv_subscript \n");
3853
3854   if (evolution_function_is_constant_p (chrec_a)
3855       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3856     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
3857                                       overlaps_a, overlaps_b, last_conflicts);
3858
3859   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3860            && evolution_function_is_constant_p (chrec_b))
3861     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
3862                                       overlaps_b, overlaps_a, last_conflicts);
3863
3864   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3865            && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3866     {
3867       if (!chrec_contains_symbols (chrec_a)
3868           && !chrec_contains_symbols (chrec_b))
3869         {
3870           analyze_subscript_affine_affine (chrec_a, chrec_b,
3871                                            overlaps_a, overlaps_b,
3872                                            last_conflicts);
3873
3874           if (CF_NOT_KNOWN_P (*overlaps_a)
3875               || CF_NOT_KNOWN_P (*overlaps_b))
3876             dependence_stats.num_siv_unimplemented++;
3877           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3878                    || CF_NO_DEPENDENCE_P (*overlaps_b))
3879             dependence_stats.num_siv_independent++;
3880           else
3881             dependence_stats.num_siv_dependent++;
3882         }
3883       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
3884                                                         &chrec_b))
3885         {
3886           analyze_subscript_affine_affine (chrec_a, chrec_b,
3887                                            overlaps_a, overlaps_b,
3888                                            last_conflicts);
3889
3890           if (CF_NOT_KNOWN_P (*overlaps_a)
3891               || CF_NOT_KNOWN_P (*overlaps_b))
3892             dependence_stats.num_siv_unimplemented++;
3893           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3894                    || CF_NO_DEPENDENCE_P (*overlaps_b))
3895             dependence_stats.num_siv_independent++;
3896           else
3897             dependence_stats.num_siv_dependent++;
3898         }
3899       else
3900         goto siv_subscript_dontknow;
3901     }
3902
3903   else
3904     {
3905     siv_subscript_dontknow:;
3906       if (dump_file && (dump_flags & TDF_DETAILS))
3907         fprintf (dump_file, "  siv test failed: unimplemented");
3908       *overlaps_a = conflict_fn_not_known ();
3909       *overlaps_b = conflict_fn_not_known ();
3910       *last_conflicts = chrec_dont_know;
3911       dependence_stats.num_siv_unimplemented++;
3912     }
3913
3914   if (dump_file && (dump_flags & TDF_DETAILS))
3915     fprintf (dump_file, ")\n");
3916 }
3917
3918 /* Returns false if we can prove that the greatest common divisor of the steps
3919    of CHREC does not divide CST, false otherwise.  */
3920
3921 static bool
3922 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
3923 {
3924   HOST_WIDE_INT cd = 0, val;
3925   tree step;
3926
3927   if (!tree_fits_shwi_p (cst))
3928     return true;
3929   val = tree_to_shwi (cst);
3930
3931   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
3932     {
3933       step = CHREC_RIGHT (chrec);
3934       if (!tree_fits_shwi_p (step))
3935         return true;
3936       cd = gcd (cd, tree_to_shwi (step));
3937       chrec = CHREC_LEFT (chrec);
3938     }
3939
3940   return val % cd == 0;
3941 }
3942
3943 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
3944    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
3945    functions that describe the relation between the elements accessed
3946    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
3947    is verified:
3948
3949    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3950
3951 static void
3952 analyze_miv_subscript (tree chrec_a,
3953                        tree chrec_b,
3954                        conflict_function **overlaps_a,
3955                        conflict_function **overlaps_b,
3956                        tree *last_conflicts,
3957                        struct loop *loop_nest)
3958 {
3959   tree type, difference;
3960
3961   dependence_stats.num_miv++;
3962   if (dump_file && (dump_flags & TDF_DETAILS))
3963     fprintf (dump_file, "(analyze_miv_subscript \n");
3964
3965   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3966   chrec_a = chrec_convert (type, chrec_a, NULL);
3967   chrec_b = chrec_convert (type, chrec_b, NULL);
3968   difference = chrec_fold_minus (type, chrec_a, chrec_b);
3969
3970   if (eq_evolutions_p (chrec_a, chrec_b))
3971     {
3972       /* Access functions are the same: all the elements are accessed
3973          in the same order.  */
3974       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3975       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3976       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
3977       dependence_stats.num_miv_dependent++;
3978     }
3979
3980   else if (evolution_function_is_constant_p (difference)
3981            && evolution_function_is_affine_multivariate_p (chrec_a,
3982                                                            loop_nest->num)
3983            && !gcd_of_steps_may_divide_p (chrec_a, difference))
3984     {
3985       /* testsuite/.../ssa-chrec-33.c
3986          {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
3987
3988          The difference is 1, and all the evolution steps are multiples
3989          of 2, consequently there are no overlapping elements.  */
3990       *overlaps_a = conflict_fn_no_dependence ();
3991       *overlaps_b = conflict_fn_no_dependence ();
3992       *last_conflicts = integer_zero_node;
3993       dependence_stats.num_miv_independent++;
3994     }
3995
3996   else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
3997            && !chrec_contains_symbols (chrec_a)
3998            && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
3999            && !chrec_contains_symbols (chrec_b))
4000     {
4001       /* testsuite/.../ssa-chrec-35.c
4002          {0, +, 1}_2  vs.  {0, +, 1}_3
4003          the overlapping elements are respectively located at iterations:
4004          {0, +, 1}_x and {0, +, 1}_x,
4005          in other words, we have the equality:
4006          {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4007
4008          Other examples:
4009          {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4010          {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4011
4012          {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4013          {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4014       */
4015       analyze_subscript_affine_affine (chrec_a, chrec_b,
4016                                        overlaps_a, overlaps_b, last_conflicts);
4017
4018       if (CF_NOT_KNOWN_P (*overlaps_a)
4019           || CF_NOT_KNOWN_P (*overlaps_b))
4020         dependence_stats.num_miv_unimplemented++;
4021       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4022                || CF_NO_DEPENDENCE_P (*overlaps_b))
4023         dependence_stats.num_miv_independent++;
4024       else
4025         dependence_stats.num_miv_dependent++;
4026     }
4027
4028   else
4029     {
4030       /* When the analysis is too difficult, answer "don't know".  */
4031       if (dump_file && (dump_flags & TDF_DETAILS))
4032         fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4033
4034       *overlaps_a = conflict_fn_not_known ();
4035       *overlaps_b = conflict_fn_not_known ();
4036       *last_conflicts = chrec_dont_know;
4037       dependence_stats.num_miv_unimplemented++;
4038     }
4039
4040   if (dump_file && (dump_flags & TDF_DETAILS))
4041     fprintf (dump_file, ")\n");
4042 }
4043
4044 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4045    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
4046    OVERLAP_ITERATIONS_B are initialized with two functions that
4047    describe the iterations that contain conflicting elements.
4048
4049    Remark: For an integer k >= 0, the following equality is true:
4050
4051    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4052 */
4053
4054 static void
4055 analyze_overlapping_iterations (tree chrec_a,
4056                                 tree chrec_b,
4057                                 conflict_function **overlap_iterations_a,
4058                                 conflict_function **overlap_iterations_b,
4059                                 tree *last_conflicts, struct loop *loop_nest)
4060 {
4061   unsigned int lnn = loop_nest->num;
4062
4063   dependence_stats.num_subscript_tests++;
4064
4065   if (dump_file && (dump_flags & TDF_DETAILS))
4066     {
4067       fprintf (dump_file, "(analyze_overlapping_iterations \n");
4068       fprintf (dump_file, "  (chrec_a = ");
4069       print_generic_expr (dump_file, chrec_a);
4070       fprintf (dump_file, ")\n  (chrec_b = ");
4071       print_generic_expr (dump_file, chrec_b);
4072       fprintf (dump_file, ")\n");
4073     }
4074
4075   if (chrec_a == NULL_TREE
4076       || chrec_b == NULL_TREE
4077       || chrec_contains_undetermined (chrec_a)
4078       || chrec_contains_undetermined (chrec_b))
4079     {
4080       dependence_stats.num_subscript_undetermined++;
4081
4082       *overlap_iterations_a = conflict_fn_not_known ();
4083       *overlap_iterations_b = conflict_fn_not_known ();
4084     }
4085
4086   /* If they are the same chrec, and are affine, they overlap
4087      on every iteration.  */
4088   else if (eq_evolutions_p (chrec_a, chrec_b)
4089            && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4090                || operand_equal_p (chrec_a, chrec_b, 0)))
4091     {
4092       dependence_stats.num_same_subscript_function++;
4093       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4094       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4095       *last_conflicts = chrec_dont_know;
4096     }
4097
4098   /* If they aren't the same, and aren't affine, we can't do anything
4099      yet.  */
4100   else if ((chrec_contains_symbols (chrec_a)
4101             || chrec_contains_symbols (chrec_b))
4102            && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4103                || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4104     {
4105       dependence_stats.num_subscript_undetermined++;
4106       *overlap_iterations_a = conflict_fn_not_known ();
4107       *overlap_iterations_b = conflict_fn_not_known ();
4108     }
4109
4110   else if (ziv_subscript_p (chrec_a, chrec_b))
4111     analyze_ziv_subscript (chrec_a, chrec_b,
4112                            overlap_iterations_a, overlap_iterations_b,
4113                            last_conflicts);
4114
4115   else if (siv_subscript_p (chrec_a, chrec_b))
4116     analyze_siv_subscript (chrec_a, chrec_b,
4117                            overlap_iterations_a, overlap_iterations_b,
4118                            last_conflicts, lnn);
4119
4120   else
4121     analyze_miv_subscript (chrec_a, chrec_b,
4122                            overlap_iterations_a, overlap_iterations_b,
4123                            last_conflicts, loop_nest);
4124
4125   if (dump_file && (dump_flags & TDF_DETAILS))
4126     {
4127       fprintf (dump_file, "  (overlap_iterations_a = ");
4128       dump_conflict_function (dump_file, *overlap_iterations_a);
4129       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
4130       dump_conflict_function (dump_file, *overlap_iterations_b);
4131       fprintf (dump_file, "))\n");
4132     }
4133 }
4134
4135 /* Helper function for uniquely inserting distance vectors.  */
4136
4137 static void
4138 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
4139 {
4140   unsigned i;
4141   lambda_vector v;
4142
4143   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
4144     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
4145       return;
4146
4147   DDR_DIST_VECTS (ddr).safe_push (dist_v);
4148 }
4149
4150 /* Helper function for uniquely inserting direction vectors.  */
4151
4152 static void
4153 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
4154 {
4155   unsigned i;
4156   lambda_vector v;
4157
4158   FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
4159     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
4160       return;
4161
4162   DDR_DIR_VECTS (ddr).safe_push (dir_v);
4163 }
4164
4165 /* Add a distance of 1 on all the loops outer than INDEX.  If we
4166    haven't yet determined a distance for this outer loop, push a new
4167    distance vector composed of the previous distance, and a distance
4168    of 1 for this outer loop.  Example:
4169
4170    | loop_1
4171    |   loop_2
4172    |     A[10]
4173    |   endloop_2
4174    | endloop_1
4175
4176    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
4177    save (0, 1), then we have to save (1, 0).  */
4178
4179 static void
4180 add_outer_distances (struct data_dependence_relation *ddr,
4181                      lambda_vector dist_v, int index)
4182 {
4183   /* For each outer loop where init_v is not set, the accesses are
4184      in dependence of distance 1 in the loop.  */
4185   while (--index >= 0)
4186     {
4187       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4188       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4189       save_v[index] = 1;
4190       save_dist_v (ddr, save_v);
4191     }
4192 }
4193
4194 /* Return false when fail to represent the data dependence as a
4195    distance vector.  A_INDEX is the index of the first reference
4196    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
4197    second reference.  INIT_B is set to true when a component has been
4198    added to the distance vector DIST_V.  INDEX_CARRY is then set to
4199    the index in DIST_V that carries the dependence.  */
4200
4201 static bool
4202 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
4203                              unsigned int a_index, unsigned int b_index,
4204                              lambda_vector dist_v, bool *init_b,
4205                              int *index_carry)
4206 {
4207   unsigned i;
4208   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4209
4210   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4211     {
4212       tree access_fn_a, access_fn_b;
4213       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
4214
4215       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4216         {
4217           non_affine_dependence_relation (ddr);
4218           return false;
4219         }
4220
4221       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
4222       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
4223
4224       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
4225           && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
4226         {
4227           HOST_WIDE_INT dist;
4228           int index;
4229           int var_a = CHREC_VARIABLE (access_fn_a);
4230           int var_b = CHREC_VARIABLE (access_fn_b);
4231
4232           if (var_a != var_b
4233               || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4234             {
4235               non_affine_dependence_relation (ddr);
4236               return false;
4237             }
4238
4239           dist = int_cst_value (SUB_DISTANCE (subscript));
4240           index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
4241           *index_carry = MIN (index, *index_carry);
4242
4243           /* This is the subscript coupling test.  If we have already
4244              recorded a distance for this loop (a distance coming from
4245              another subscript), it should be the same.  For example,
4246              in the following code, there is no dependence:
4247
4248              | loop i = 0, N, 1
4249              |   T[i+1][i] = ...
4250              |   ... = T[i][i]
4251              | endloop
4252           */
4253           if (init_v[index] != 0 && dist_v[index] != dist)
4254             {
4255               finalize_ddr_dependent (ddr, chrec_known);
4256               return false;
4257             }
4258
4259           dist_v[index] = dist;
4260           init_v[index] = 1;
4261           *init_b = true;
4262         }
4263       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
4264         {
4265           /* This can be for example an affine vs. constant dependence
4266              (T[i] vs. T[3]) that is not an affine dependence and is
4267              not representable as a distance vector.  */
4268           non_affine_dependence_relation (ddr);
4269           return false;
4270         }
4271     }
4272
4273   return true;
4274 }
4275
4276 /* Return true when the DDR contains only constant access functions.  */
4277
4278 static bool
4279 constant_access_functions (const struct data_dependence_relation *ddr)
4280 {
4281   unsigned i;
4282   subscript *sub;
4283
4284   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4285     if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
4286         || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
4287       return false;
4288
4289   return true;
4290 }
4291
4292 /* Helper function for the case where DDR_A and DDR_B are the same
4293    multivariate access function with a constant step.  For an example
4294    see pr34635-1.c.  */
4295
4296 static void
4297 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
4298 {
4299   int x_1, x_2;
4300   tree c_1 = CHREC_LEFT (c_2);
4301   tree c_0 = CHREC_LEFT (c_1);
4302   lambda_vector dist_v;
4303   HOST_WIDE_INT v1, v2, cd;
4304
4305   /* Polynomials with more than 2 variables are not handled yet.  When
4306      the evolution steps are parameters, it is not possible to
4307      represent the dependence using classical distance vectors.  */
4308   if (TREE_CODE (c_0) != INTEGER_CST
4309       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
4310       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
4311     {
4312       DDR_AFFINE_P (ddr) = false;
4313       return;
4314     }
4315
4316   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
4317   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
4318
4319   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
4320   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4321   v1 = int_cst_value (CHREC_RIGHT (c_1));
4322   v2 = int_cst_value (CHREC_RIGHT (c_2));
4323   cd = gcd (v1, v2);
4324   v1 /= cd;
4325   v2 /= cd;
4326
4327   if (v2 < 0)
4328     {
4329       v2 = -v2;
4330       v1 = -v1;
4331     }
4332
4333   dist_v[x_1] = v2;
4334   dist_v[x_2] = -v1;
4335   save_dist_v (ddr, dist_v);
4336
4337   add_outer_distances (ddr, dist_v, x_1);
4338 }
4339
4340 /* Helper function for the case where DDR_A and DDR_B are the same
4341    access functions.  */
4342
4343 static void
4344 add_other_self_distances (struct data_dependence_relation *ddr)
4345 {
4346   lambda_vector dist_v;
4347   unsigned i;
4348   int index_carry = DDR_NB_LOOPS (ddr);
4349   subscript *sub;
4350
4351   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4352     {
4353       tree access_fun = SUB_ACCESS_FN (sub, 0);
4354
4355       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
4356         {
4357           if (!evolution_function_is_univariate_p (access_fun))
4358             {
4359               if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
4360                 {
4361                   DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
4362                   return;
4363                 }
4364
4365               access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
4366
4367               if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
4368                 add_multivariate_self_dist (ddr, access_fun);
4369               else
4370                 /* The evolution step is not constant: it varies in
4371                    the outer loop, so this cannot be represented by a
4372                    distance vector.  For example in pr34635.c the
4373                    evolution is {0, +, {0, +, 4}_1}_2.  */
4374                 DDR_AFFINE_P (ddr) = false;
4375
4376               return;
4377             }
4378
4379           index_carry = MIN (index_carry,
4380                              index_in_loop_nest (CHREC_VARIABLE (access_fun),
4381                                                  DDR_LOOP_NEST (ddr)));
4382         }
4383     }
4384
4385   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4386   add_outer_distances (ddr, dist_v, index_carry);
4387 }
4388
4389 static void
4390 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
4391 {
4392   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4393
4394   dist_v[DDR_INNER_LOOP (ddr)] = 1;
4395   save_dist_v (ddr, dist_v);
4396 }
4397
4398 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
4399    is the case for example when access functions are the same and
4400    equal to a constant, as in:
4401
4402    | loop_1
4403    |   A[3] = ...
4404    |   ... = A[3]
4405    | endloop_1
4406
4407    in which case the distance vectors are (0) and (1).  */
4408
4409 static void
4410 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
4411 {
4412   unsigned i, j;
4413
4414   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4415     {
4416       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
4417       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
4418       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
4419
4420       for (j = 0; j < ca->n; j++)
4421         if (affine_function_zero_p (ca->fns[j]))
4422           {
4423             insert_innermost_unit_dist_vector (ddr);
4424             return;
4425           }
4426
4427       for (j = 0; j < cb->n; j++)
4428         if (affine_function_zero_p (cb->fns[j]))
4429           {
4430             insert_innermost_unit_dist_vector (ddr);
4431             return;
4432           }
4433     }
4434 }
4435
4436 /* Return true when the DDR contains two data references that have the
4437    same access functions.  */
4438
4439 static inline bool
4440 same_access_functions (const struct data_dependence_relation *ddr)
4441 {
4442   unsigned i;
4443   subscript *sub;
4444
4445   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4446     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
4447                           SUB_ACCESS_FN (sub, 1)))
4448       return false;
4449
4450   return true;
4451 }
4452
4453 /* Compute the classic per loop distance vector.  DDR is the data
4454    dependence relation to build a vector from.  Return false when fail
4455    to represent the data dependence as a distance vector.  */
4456
4457 static bool
4458 build_classic_dist_vector (struct data_dependence_relation *ddr,
4459                            struct loop *loop_nest)
4460 {
4461   bool init_b = false;
4462   int index_carry = DDR_NB_LOOPS (ddr);
4463   lambda_vector dist_v;
4464
4465   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4466     return false;
4467
4468   if (same_access_functions (ddr))
4469     {
4470       /* Save the 0 vector.  */
4471       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4472       save_dist_v (ddr, dist_v);
4473
4474       if (constant_access_functions (ddr))
4475         add_distance_for_zero_overlaps (ddr);
4476
4477       if (DDR_NB_LOOPS (ddr) > 1)
4478         add_other_self_distances (ddr);
4479
4480       return true;
4481     }
4482
4483   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4484   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
4485     return false;
4486
4487   /* Save the distance vector if we initialized one.  */
4488   if (init_b)
4489     {
4490       /* Verify a basic constraint: classic distance vectors should
4491          always be lexicographically positive.
4492
4493          Data references are collected in the order of execution of
4494          the program, thus for the following loop
4495
4496          | for (i = 1; i < 100; i++)
4497          |   for (j = 1; j < 100; j++)
4498          |     {
4499          |       t = T[j+1][i-1];  // A
4500          |       T[j][i] = t + 2;  // B
4501          |     }
4502
4503          references are collected following the direction of the wind:
4504          A then B.  The data dependence tests are performed also
4505          following this order, such that we're looking at the distance
4506          separating the elements accessed by A from the elements later
4507          accessed by B.  But in this example, the distance returned by
4508          test_dep (A, B) is lexicographically negative (-1, 1), that
4509          means that the access A occurs later than B with respect to
4510          the outer loop, ie. we're actually looking upwind.  In this
4511          case we solve test_dep (B, A) looking downwind to the
4512          lexicographically positive solution, that returns the
4513          distance vector (1, -1).  */
4514       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
4515         {
4516           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4517           if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4518             return false;
4519           compute_subscript_distance (ddr);
4520           if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
4521                                             &index_carry))
4522             return false;
4523           save_dist_v (ddr, save_v);
4524           DDR_REVERSED_P (ddr) = true;
4525
4526           /* In this case there is a dependence forward for all the
4527              outer loops:
4528
4529              | for (k = 1; k < 100; k++)
4530              |  for (i = 1; i < 100; i++)
4531              |   for (j = 1; j < 100; j++)
4532              |     {
4533              |       t = T[j+1][i-1];  // A
4534              |       T[j][i] = t + 2;  // B
4535              |     }
4536
4537              the vectors are:
4538              (0,  1, -1)
4539              (1,  1, -1)
4540              (1, -1,  1)
4541           */
4542           if (DDR_NB_LOOPS (ddr) > 1)
4543             {
4544               add_outer_distances (ddr, save_v, index_carry);
4545               add_outer_distances (ddr, dist_v, index_carry);
4546             }
4547         }
4548       else
4549         {
4550           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4551           lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4552
4553           if (DDR_NB_LOOPS (ddr) > 1)
4554             {
4555               lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4556
4557               if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4558                 return false;
4559               compute_subscript_distance (ddr);
4560               if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
4561                                                 &index_carry))
4562                 return false;
4563
4564               save_dist_v (ddr, save_v);
4565               add_outer_distances (ddr, dist_v, index_carry);
4566               add_outer_distances (ddr, opposite_v, index_carry);
4567             }
4568           else
4569             save_dist_v (ddr, save_v);
4570         }
4571     }
4572   else
4573     {
4574       /* There is a distance of 1 on all the outer loops: Example:
4575          there is a dependence of distance 1 on loop_1 for the array A.
4576
4577          | loop_1
4578          |   A[5] = ...
4579          | endloop
4580       */
4581       add_outer_distances (ddr, dist_v,
4582                            lambda_vector_first_nz (dist_v,
4583                                                    DDR_NB_LOOPS (ddr), 0));
4584     }
4585
4586   if (dump_file && (dump_flags & TDF_DETAILS))
4587     {
4588       unsigned i;
4589
4590       fprintf (dump_file, "(build_classic_dist_vector\n");
4591       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4592         {
4593           fprintf (dump_file, "  dist_vector = (");
4594           print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
4595                                DDR_NB_LOOPS (ddr));
4596           fprintf (dump_file, "  )\n");
4597         }
4598       fprintf (dump_file, ")\n");
4599     }
4600
4601   return true;
4602 }
4603
4604 /* Return the direction for a given distance.
4605    FIXME: Computing dir this way is suboptimal, since dir can catch
4606    cases that dist is unable to represent.  */
4607
4608 static inline enum data_dependence_direction
4609 dir_from_dist (int dist)
4610 {
4611   if (dist > 0)
4612     return dir_positive;
4613   else if (dist < 0)
4614     return dir_negative;
4615   else
4616     return dir_equal;
4617 }
4618
4619 /* Compute the classic per loop direction vector.  DDR is the data
4620    dependence relation to build a vector from.  */
4621
4622 static void
4623 build_classic_dir_vector (struct data_dependence_relation *ddr)
4624 {
4625   unsigned i, j;
4626   lambda_vector dist_v;
4627
4628   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
4629     {
4630       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4631
4632       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4633         dir_v[j] = dir_from_dist (dist_v[j]);
4634
4635       save_dir_v (ddr, dir_v);
4636     }
4637 }
4638
4639 /* Helper function.  Returns true when there is a dependence between the
4640    data references.  A_INDEX is the index of the first reference (0 for
4641    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
4642
4643 static bool
4644 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
4645                                unsigned int a_index, unsigned int b_index,
4646                                struct loop *loop_nest)
4647 {
4648   unsigned int i;
4649   tree last_conflicts;
4650   struct subscript *subscript;
4651   tree res = NULL_TREE;
4652
4653   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
4654     {
4655       conflict_function *overlaps_a, *overlaps_b;
4656
4657       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
4658                                       SUB_ACCESS_FN (subscript, b_index),
4659                                       &overlaps_a, &overlaps_b,
4660                                       &last_conflicts, loop_nest);
4661
4662       if (SUB_CONFLICTS_IN_A (subscript))
4663         free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4664       if (SUB_CONFLICTS_IN_B (subscript))
4665         free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4666
4667       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
4668       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
4669       SUB_LAST_CONFLICT (subscript) = last_conflicts;
4670
4671       /* If there is any undetermined conflict function we have to
4672          give a conservative answer in case we cannot prove that
4673          no dependence exists when analyzing another subscript.  */
4674       if (CF_NOT_KNOWN_P (overlaps_a)
4675           || CF_NOT_KNOWN_P (overlaps_b))
4676         {
4677           res = chrec_dont_know;
4678           continue;
4679         }
4680
4681       /* When there is a subscript with no dependence we can stop.  */
4682       else if (CF_NO_DEPENDENCE_P (overlaps_a)
4683                || CF_NO_DEPENDENCE_P (overlaps_b))
4684         {
4685           res = chrec_known;
4686           break;
4687         }
4688     }
4689
4690   if (res == NULL_TREE)
4691     return true;
4692
4693   if (res == chrec_known)
4694     dependence_stats.num_dependence_independent++;
4695   else
4696     dependence_stats.num_dependence_undetermined++;
4697   finalize_ddr_dependent (ddr, res);
4698   return false;
4699 }
4700
4701 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
4702
4703 static void
4704 subscript_dependence_tester (struct data_dependence_relation *ddr,
4705                              struct loop *loop_nest)
4706 {
4707   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
4708     dependence_stats.num_dependence_dependent++;
4709
4710   compute_subscript_distance (ddr);
4711   if (build_classic_dist_vector (ddr, loop_nest))
4712     build_classic_dir_vector (ddr);
4713 }
4714
4715 /* Returns true when all the access functions of A are affine or
4716    constant with respect to LOOP_NEST.  */
4717
4718 static bool
4719 access_functions_are_affine_or_constant_p (const struct data_reference *a,
4720                                            const struct loop *loop_nest)
4721 {
4722   unsigned int i;
4723   vec<tree> fns = DR_ACCESS_FNS (a);
4724   tree t;
4725
4726   FOR_EACH_VEC_ELT (fns, i, t)
4727     if (!evolution_function_is_invariant_p (t, loop_nest->num)
4728         && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
4729       return false;
4730
4731   return true;
4732 }
4733
4734 /* This computes the affine dependence relation between A and B with
4735    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
4736    independence between two accesses, while CHREC_DONT_KNOW is used
4737    for representing the unknown relation.
4738
4739    Note that it is possible to stop the computation of the dependence
4740    relation the first time we detect a CHREC_KNOWN element for a given
4741    subscript.  */
4742
4743 void
4744 compute_affine_dependence (struct data_dependence_relation *ddr,
4745                            struct loop *loop_nest)
4746 {
4747   struct data_reference *dra = DDR_A (ddr);
4748   struct data_reference *drb = DDR_B (ddr);
4749
4750   if (dump_file && (dump_flags & TDF_DETAILS))
4751     {
4752       fprintf (dump_file, "(compute_affine_dependence\n");
4753       fprintf (dump_file, "  stmt_a: ");
4754       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4755       fprintf (dump_file, "  stmt_b: ");
4756       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4757     }
4758
4759   /* Analyze only when the dependence relation is not yet known.  */
4760   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4761     {
4762       dependence_stats.num_dependence_tests++;
4763
4764       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4765           && access_functions_are_affine_or_constant_p (drb, loop_nest))
4766         subscript_dependence_tester (ddr, loop_nest);
4767
4768       /* As a last case, if the dependence cannot be determined, or if
4769          the dependence is considered too difficult to determine, answer
4770          "don't know".  */
4771       else
4772         {
4773           dependence_stats.num_dependence_undetermined++;
4774
4775           if (dump_file && (dump_flags & TDF_DETAILS))
4776             {
4777               fprintf (dump_file, "Data ref a:\n");
4778               dump_data_reference (dump_file, dra);
4779               fprintf (dump_file, "Data ref b:\n");
4780               dump_data_reference (dump_file, drb);
4781               fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4782             }
4783           finalize_ddr_dependent (ddr, chrec_dont_know);
4784         }
4785     }
4786
4787   if (dump_file && (dump_flags & TDF_DETAILS))
4788     {
4789       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4790         fprintf (dump_file, ") -> no dependence\n");
4791       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4792         fprintf (dump_file, ") -> dependence analysis failed\n");
4793       else
4794         fprintf (dump_file, ")\n");
4795     }
4796 }
4797
4798 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4799    the data references in DATAREFS, in the LOOP_NEST.  When
4800    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4801    relations.  Return true when successful, i.e. data references number
4802    is small enough to be handled.  */
4803
4804 bool
4805 compute_all_dependences (vec<data_reference_p> datarefs,
4806                          vec<ddr_p> *dependence_relations,
4807                          vec<loop_p> loop_nest,
4808                          bool compute_self_and_rr)
4809 {
4810   struct data_dependence_relation *ddr;
4811   struct data_reference *a, *b;
4812   unsigned int i, j;
4813
4814   if ((int) datarefs.length ()
4815       > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4816     {
4817       struct data_dependence_relation *ddr;
4818
4819       /* Insert a single relation into dependence_relations:
4820          chrec_dont_know.  */
4821       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4822       dependence_relations->safe_push (ddr);
4823       return false;
4824     }
4825
4826   FOR_EACH_VEC_ELT (datarefs, i, a)
4827     for (j = i + 1; datarefs.iterate (j, &b); j++)
4828       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4829         {
4830           ddr = initialize_data_dependence_relation (a, b, loop_nest);
4831           dependence_relations->safe_push (ddr);
4832           if (loop_nest.exists ())
4833             compute_affine_dependence (ddr, loop_nest[0]);
4834         }
4835
4836   if (compute_self_and_rr)
4837     FOR_EACH_VEC_ELT (datarefs, i, a)
4838       {
4839         ddr = initialize_data_dependence_relation (a, a, loop_nest);
4840         dependence_relations->safe_push (ddr);
4841         if (loop_nest.exists ())
4842           compute_affine_dependence (ddr, loop_nest[0]);
4843       }
4844
4845   return true;
4846 }
4847
4848 /* Describes a location of a memory reference.  */
4849
4850 struct data_ref_loc
4851 {
4852   /* The memory reference.  */
4853   tree ref;
4854
4855   /* True if the memory reference is read.  */
4856   bool is_read;
4857
4858   /* True if the data reference is conditional within the containing
4859      statement, i.e. if it might not occur even when the statement
4860      is executed and runs to completion.  */
4861   bool is_conditional_in_stmt;
4862 };
4863
4864
4865 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
4866    true if STMT clobbers memory, false otherwise.  */
4867
4868 static bool
4869 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
4870 {
4871   bool clobbers_memory = false;
4872   data_ref_loc ref;
4873   tree op0, op1;
4874   enum gimple_code stmt_code = gimple_code (stmt);
4875
4876   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4877      As we cannot model data-references to not spelled out
4878      accesses give up if they may occur.  */
4879   if (stmt_code == GIMPLE_CALL
4880       && !(gimple_call_flags (stmt) & ECF_CONST))
4881     {
4882       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
4883       if (gimple_call_internal_p (stmt))
4884         switch (gimple_call_internal_fn (stmt))
4885           {
4886           case IFN_GOMP_SIMD_LANE:
4887             {
4888               struct loop *loop = gimple_bb (stmt)->loop_father;
4889               tree uid = gimple_call_arg (stmt, 0);
4890               gcc_assert (TREE_CODE (uid) == SSA_NAME);
4891               if (loop == NULL
4892                   || loop->simduid != SSA_NAME_VAR (uid))
4893                 clobbers_memory = true;
4894               break;
4895             }
4896           case IFN_MASK_LOAD:
4897           case IFN_MASK_STORE:
4898             break;
4899           default:
4900             clobbers_memory = true;
4901             break;
4902           }
4903       else
4904         clobbers_memory = true;
4905     }
4906   else if (stmt_code == GIMPLE_ASM
4907            && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
4908                || gimple_vuse (stmt)))
4909     clobbers_memory = true;
4910
4911   if (!gimple_vuse (stmt))
4912     return clobbers_memory;
4913
4914   if (stmt_code == GIMPLE_ASSIGN)
4915     {
4916       tree base;
4917       op0 = gimple_assign_lhs (stmt);
4918       op1 = gimple_assign_rhs1 (stmt);
4919
4920       if (DECL_P (op1)
4921           || (REFERENCE_CLASS_P (op1)
4922               && (base = get_base_address (op1))
4923               && TREE_CODE (base) != SSA_NAME
4924               && !is_gimple_min_invariant (base)))
4925         {
4926           ref.ref = op1;
4927           ref.is_read = true;
4928           ref.is_conditional_in_stmt = false;
4929           references->safe_push (ref);
4930         }
4931     }
4932   else if (stmt_code == GIMPLE_CALL)
4933     {
4934       unsigned i, n;
4935       tree ptr, type;
4936       unsigned int align;
4937
4938       ref.is_read = false;
4939       if (gimple_call_internal_p (stmt))
4940         switch (gimple_call_internal_fn (stmt))
4941           {
4942           case IFN_MASK_LOAD:
4943             if (gimple_call_lhs (stmt) == NULL_TREE)
4944               break;
4945             ref.is_read = true;
4946             /* FALLTHRU */
4947           case IFN_MASK_STORE:
4948             ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
4949             align = tree_to_shwi (gimple_call_arg (stmt, 1));
4950             if (ref.is_read)
4951               type = TREE_TYPE (gimple_call_lhs (stmt));
4952             else
4953               type = TREE_TYPE (gimple_call_arg (stmt, 3));
4954             if (TYPE_ALIGN (type) != align)
4955               type = build_aligned_type (type, align);
4956             ref.is_conditional_in_stmt = true;
4957             ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
4958                                    ptr);
4959             references->safe_push (ref);
4960             return false;
4961           default:
4962             break;
4963           }
4964
4965       op0 = gimple_call_lhs (stmt);
4966       n = gimple_call_num_args (stmt);
4967       for (i = 0; i < n; i++)
4968         {
4969           op1 = gimple_call_arg (stmt, i);
4970
4971           if (DECL_P (op1)
4972               || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
4973             {
4974               ref.ref = op1;
4975               ref.is_read = true;
4976               ref.is_conditional_in_stmt = false;
4977               references->safe_push (ref);
4978             }
4979         }
4980     }
4981   else
4982     return clobbers_memory;
4983
4984   if (op0
4985       && (DECL_P (op0)
4986           || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
4987     {
4988       ref.ref = op0;
4989       ref.is_read = false;
4990       ref.is_conditional_in_stmt = false;
4991       references->safe_push (ref);
4992     }
4993   return clobbers_memory;
4994 }
4995
4996
4997 /* Returns true if the loop-nest has any data reference.  */
4998
4999 bool
5000 loop_nest_has_data_refs (loop_p loop)
5001 {
5002   basic_block *bbs = get_loop_body (loop);
5003   auto_vec<data_ref_loc, 3> references;
5004
5005   for (unsigned i = 0; i < loop->num_nodes; i++)
5006     {
5007       basic_block bb = bbs[i];
5008       gimple_stmt_iterator bsi;
5009
5010       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5011         {
5012           gimple *stmt = gsi_stmt (bsi);
5013           get_references_in_stmt (stmt, &references);
5014           if (references.length ())
5015             {
5016               free (bbs);
5017               return true;
5018             }
5019         }
5020     }
5021   free (bbs);
5022   return false;
5023 }
5024
5025 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
5026    reference, returns false, otherwise returns true.  NEST is the outermost
5027    loop of the loop nest in which the references should be analyzed.  */
5028
5029 bool
5030 find_data_references_in_stmt (struct loop *nest, gimple *stmt,
5031                               vec<data_reference_p> *datarefs)
5032 {
5033   unsigned i;
5034   auto_vec<data_ref_loc, 2> references;
5035   data_ref_loc *ref;
5036   bool ret = true;
5037   data_reference_p dr;
5038
5039   if (get_references_in_stmt (stmt, &references))
5040     return false;
5041
5042   FOR_EACH_VEC_ELT (references, i, ref)
5043     {
5044       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5045                             loop_containing_stmt (stmt), ref->ref,
5046                             stmt, ref->is_read, ref->is_conditional_in_stmt);
5047       gcc_assert (dr != NULL);
5048       datarefs->safe_push (dr);
5049     }
5050
5051   return ret;
5052 }
5053
5054 /* Stores the data references in STMT to DATAREFS.  If there is an
5055    unanalyzable reference, returns false, otherwise returns true.
5056    NEST is the outermost loop of the loop nest in which the references
5057    should be instantiated, LOOP is the loop in which the references
5058    should be analyzed.  */
5059
5060 bool
5061 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5062                                        vec<data_reference_p> *datarefs)
5063 {
5064   unsigned i;
5065   auto_vec<data_ref_loc, 2> references;
5066   data_ref_loc *ref;
5067   bool ret = true;
5068   data_reference_p dr;
5069
5070   if (get_references_in_stmt (stmt, &references))
5071     return false;
5072
5073   FOR_EACH_VEC_ELT (references, i, ref)
5074     {
5075       dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
5076                             ref->is_conditional_in_stmt);
5077       gcc_assert (dr != NULL);
5078       datarefs->safe_push (dr);
5079     }
5080
5081   return ret;
5082 }
5083
5084 /* Search the data references in LOOP, and record the information into
5085    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5086    difficult case, returns NULL_TREE otherwise.  */
5087
5088 tree
5089 find_data_references_in_bb (struct loop *loop, basic_block bb,
5090                             vec<data_reference_p> *datarefs)
5091 {
5092   gimple_stmt_iterator bsi;
5093
5094   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5095     {
5096       gimple *stmt = gsi_stmt (bsi);
5097
5098       if (!find_data_references_in_stmt (loop, stmt, datarefs))
5099         {
5100           struct data_reference *res;
5101           res = XCNEW (struct data_reference);
5102           datarefs->safe_push (res);
5103
5104           return chrec_dont_know;
5105         }
5106     }
5107
5108   return NULL_TREE;
5109 }
5110
5111 /* Search the data references in LOOP, and record the information into
5112    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5113    difficult case, returns NULL_TREE otherwise.
5114
5115    TODO: This function should be made smarter so that it can handle address
5116    arithmetic as if they were array accesses, etc.  */
5117
5118 tree
5119 find_data_references_in_loop (struct loop *loop,
5120                               vec<data_reference_p> *datarefs)
5121 {
5122   basic_block bb, *bbs;
5123   unsigned int i;
5124
5125   bbs = get_loop_body_in_dom_order (loop);
5126
5127   for (i = 0; i < loop->num_nodes; i++)
5128     {
5129       bb = bbs[i];
5130
5131       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
5132         {
5133           free (bbs);
5134           return chrec_dont_know;
5135         }
5136     }
5137   free (bbs);
5138
5139   return NULL_TREE;
5140 }
5141
5142 /* Return the alignment in bytes that DRB is guaranteed to have at all
5143    times.  */
5144
5145 unsigned int
5146 dr_alignment (innermost_loop_behavior *drb)
5147 {
5148   /* Get the alignment of BASE_ADDRESS + INIT.  */
5149   unsigned int alignment = drb->base_alignment;
5150   unsigned int misalignment = (drb->base_misalignment
5151                                + TREE_INT_CST_LOW (drb->init));
5152   if (misalignment != 0)
5153     alignment = MIN (alignment, misalignment & -misalignment);
5154
5155   /* Cap it to the alignment of OFFSET.  */
5156   if (!integer_zerop (drb->offset))
5157     alignment = MIN (alignment, drb->offset_alignment);
5158
5159   /* Cap it to the alignment of STEP.  */
5160   if (!integer_zerop (drb->step))
5161     alignment = MIN (alignment, drb->step_alignment);
5162
5163   return alignment;
5164 }
5165
5166 /* Recursive helper function.  */
5167
5168 static bool
5169 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
5170 {
5171   /* Inner loops of the nest should not contain siblings.  Example:
5172      when there are two consecutive loops,
5173
5174      | loop_0
5175      |   loop_1
5176      |     A[{0, +, 1}_1]
5177      |   endloop_1
5178      |   loop_2
5179      |     A[{0, +, 1}_2]
5180      |   endloop_2
5181      | endloop_0
5182
5183      the dependence relation cannot be captured by the distance
5184      abstraction.  */
5185   if (loop->next)
5186     return false;
5187
5188   loop_nest->safe_push (loop);
5189   if (loop->inner)
5190     return find_loop_nest_1 (loop->inner, loop_nest);
5191   return true;
5192 }
5193
5194 /* Return false when the LOOP is not well nested.  Otherwise return
5195    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
5196    contain the loops from the outermost to the innermost, as they will
5197    appear in the classic distance vector.  */
5198
5199 bool
5200 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
5201 {
5202   loop_nest->safe_push (loop);
5203   if (loop->inner)
5204     return find_loop_nest_1 (loop->inner, loop_nest);
5205   return true;
5206 }
5207
5208 /* Returns true when the data dependences have been computed, false otherwise.
5209    Given a loop nest LOOP, the following vectors are returned:
5210    DATAREFS is initialized to all the array elements contained in this loop,
5211    DEPENDENCE_RELATIONS contains the relations between the data references.
5212    Compute read-read and self relations if
5213    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
5214
5215 bool
5216 compute_data_dependences_for_loop (struct loop *loop,
5217                                    bool compute_self_and_read_read_dependences,
5218                                    vec<loop_p> *loop_nest,
5219                                    vec<data_reference_p> *datarefs,
5220                                    vec<ddr_p> *dependence_relations)
5221 {
5222   bool res = true;
5223
5224   memset (&dependence_stats, 0, sizeof (dependence_stats));
5225
5226   /* If the loop nest is not well formed, or one of the data references
5227      is not computable, give up without spending time to compute other
5228      dependences.  */
5229   if (!loop
5230       || !find_loop_nest (loop, loop_nest)
5231       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
5232       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
5233                                    compute_self_and_read_read_dependences))
5234     res = false;
5235
5236   if (dump_file && (dump_flags & TDF_STATS))
5237     {
5238       fprintf (dump_file, "Dependence tester statistics:\n");
5239
5240       fprintf (dump_file, "Number of dependence tests: %d\n",
5241                dependence_stats.num_dependence_tests);
5242       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
5243                dependence_stats.num_dependence_dependent);
5244       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
5245                dependence_stats.num_dependence_independent);
5246       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
5247                dependence_stats.num_dependence_undetermined);
5248
5249       fprintf (dump_file, "Number of subscript tests: %d\n",
5250                dependence_stats.num_subscript_tests);
5251       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
5252                dependence_stats.num_subscript_undetermined);
5253       fprintf (dump_file, "Number of same subscript function: %d\n",
5254                dependence_stats.num_same_subscript_function);
5255
5256       fprintf (dump_file, "Number of ziv tests: %d\n",
5257                dependence_stats.num_ziv);
5258       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
5259                dependence_stats.num_ziv_dependent);
5260       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
5261                dependence_stats.num_ziv_independent);
5262       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
5263                dependence_stats.num_ziv_unimplemented);
5264
5265       fprintf (dump_file, "Number of siv tests: %d\n",
5266                dependence_stats.num_siv);
5267       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
5268                dependence_stats.num_siv_dependent);
5269       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
5270                dependence_stats.num_siv_independent);
5271       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
5272                dependence_stats.num_siv_unimplemented);
5273
5274       fprintf (dump_file, "Number of miv tests: %d\n",
5275                dependence_stats.num_miv);
5276       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
5277                dependence_stats.num_miv_dependent);
5278       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
5279                dependence_stats.num_miv_independent);
5280       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
5281                dependence_stats.num_miv_unimplemented);
5282     }
5283
5284   return res;
5285 }
5286
5287 /* Free the memory used by a data dependence relation DDR.  */
5288
5289 void
5290 free_dependence_relation (struct data_dependence_relation *ddr)
5291 {
5292   if (ddr == NULL)
5293     return;
5294
5295   if (DDR_SUBSCRIPTS (ddr).exists ())
5296     free_subscripts (DDR_SUBSCRIPTS (ddr));
5297   DDR_DIST_VECTS (ddr).release ();
5298   DDR_DIR_VECTS (ddr).release ();
5299
5300   free (ddr);
5301 }
5302
5303 /* Free the memory used by the data dependence relations from
5304    DEPENDENCE_RELATIONS.  */
5305
5306 void
5307 free_dependence_relations (vec<ddr_p> dependence_relations)
5308 {
5309   unsigned int i;
5310   struct data_dependence_relation *ddr;
5311
5312   FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5313     if (ddr)
5314       free_dependence_relation (ddr);
5315
5316   dependence_relations.release ();
5317 }
5318
5319 /* Free the memory used by the data references from DATAREFS.  */
5320
5321 void
5322 free_data_refs (vec<data_reference_p> datarefs)
5323 {
5324   unsigned int i;
5325   struct data_reference *dr;
5326
5327   FOR_EACH_VEC_ELT (datarefs, i, dr)
5328     free_data_ref (dr);
5329   datarefs.release ();
5330 }
5331
5332 /* Common routine implementing both dr_direction_indicator and
5333    dr_zero_step_indicator.  Return USEFUL_MIN if the indicator is known
5334    to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
5335    Return the step as the indicator otherwise.  */
5336
5337 static tree
5338 dr_step_indicator (struct data_reference *dr, int useful_min)
5339 {
5340   tree step = DR_STEP (dr);
5341   STRIP_NOPS (step);
5342   /* Look for cases where the step is scaled by a positive constant
5343      integer, which will often be the access size.  If the multiplication
5344      doesn't change the sign (due to overflow effects) then we can
5345      test the unscaled value instead.  */
5346   if (TREE_CODE (step) == MULT_EXPR
5347       && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
5348       && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
5349     {
5350       tree factor = TREE_OPERAND (step, 1);
5351       step = TREE_OPERAND (step, 0);
5352
5353       /* Strip widening and truncating conversions as well as nops.  */
5354       if (CONVERT_EXPR_P (step)
5355           && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
5356         step = TREE_OPERAND (step, 0);
5357       tree type = TREE_TYPE (step);
5358
5359       /* Get the range of step values that would not cause overflow.  */
5360       widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
5361                          / wi::to_widest (factor));
5362       widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
5363                          / wi::to_widest (factor));
5364
5365       /* Get the range of values that the unconverted step actually has.  */
5366       wide_int step_min, step_max;
5367       if (TREE_CODE (step) != SSA_NAME
5368           || get_range_info (step, &step_min, &step_max) != VR_RANGE)
5369         {
5370           step_min = wi::to_wide (TYPE_MIN_VALUE (type));
5371           step_max = wi::to_wide (TYPE_MAX_VALUE (type));
5372         }
5373
5374       /* Check whether the unconverted step has an acceptable range.  */
5375       signop sgn = TYPE_SIGN (type);
5376       if (wi::les_p (minv, widest_int::from (step_min, sgn))
5377           && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
5378         {
5379           if (wi::ge_p (step_min, useful_min, sgn))
5380             return ssize_int (useful_min);
5381           else if (wi::lt_p (step_max, 0, sgn))
5382             return ssize_int (-1);
5383           else
5384             return fold_convert (ssizetype, step);
5385         }
5386     }
5387   return DR_STEP (dr);
5388 }
5389
5390 /* Return a value that is negative iff DR has a negative step.  */
5391
5392 tree
5393 dr_direction_indicator (struct data_reference *dr)
5394 {
5395   return dr_step_indicator (dr, 0);
5396 }
5397
5398 /* Return a value that is zero iff DR has a zero step.  */
5399
5400 tree
5401 dr_zero_step_indicator (struct data_reference *dr)
5402 {
5403   return dr_step_indicator (dr, 1);
5404 }
5405
5406 /* Return true if DR is known to have a nonnegative (but possibly zero)
5407    step.  */
5408
5409 bool
5410 dr_known_forward_stride_p (struct data_reference *dr)
5411 {
5412   tree indicator = dr_direction_indicator (dr);
5413   tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
5414                                    fold_convert (ssizetype, indicator),
5415                                    ssize_int (0));
5416   return neg_step_val && integer_zerop (neg_step_val);
5417 }