gcc/tree-data-ref.c

   1 /* Data references and dependences detectors.
   2    Copyright (C) 2003-2017 Free Software Foundation, Inc.
   3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass walks a given loop structure searching for array
  22    references.  The information about the array accesses is recorded
  23    in DATA_REFERENCE structures.
  24
  25    The basic test for determining the dependences is:
  26    given two access functions chrec1 and chrec2 to a same array, and
  27    x and y two vectors from the iteration domain, the same element of
  28    the array is accessed twice at iterations x and y if and only if:
  29    |             chrec1 (x) == chrec2 (y).
  30
  31    The goals of this analysis are:
  32
  33    - to determine the independence: the relation between two
  34      independent accesses is qualified with the chrec_known (this
  35      information allows a loop parallelization),
  36
  37    - when two data references access the same data, to qualify the
  38      dependence relation with classic dependence representations:
  39
  40        - distance vectors
  41        - direction vectors
  42        - loop carried level dependence
  43        - polyhedron dependence
  44      or with the chains of recurrences based representation,
  45
  46    - to define a knowledge base for storing the data dependence
  47      information,
  48
  49    - to define an interface to access this data.
  50
  51
  52    Definitions:
  53
  54    - subscript: given two array accesses a subscript is the tuple
  55    composed of the access functions for a given dimension.  Example:
  56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
  57    (f1, g1), (f2, g2), (f3, g3).
  58
  59    - Diophantine equation: an equation whose coefficients and
  60    solutions are integer constants, for example the equation
  61    |   3*x + 2*y = 1
  62    has an integer solution x = 1 and y = -1.
  63
  64    References:
  65
  66    - "Advanced Compilation for High Performance Computing" by Randy
  67    Allen and Ken Kennedy.
  68    http://citeseer.ist.psu.edu/goff91practical.html
  69
  70    - "Loop Transformations for Restructuring Compilers - The Foundations"
  71    by Utpal Banerjee.
  72
  73
  74 */
  75
  76 #include "config.h"
  77 #include "system.h"
  78 #include "coretypes.h"
  79 #include "backend.h"
  80 #include "rtl.h"
  81 #include "tree.h"
  82 #include "gimple.h"
  83 #include "gimple-pretty-print.h"
  84 #include "alias.h"
  85 #include "fold-const.h"
  86 #include "expr.h"
  87 #include "gimple-iterator.h"
  88 #include "tree-ssa-loop-niter.h"
  89 #include "tree-ssa-loop.h"
  90 #include "tree-ssa.h"
  91 #include "cfgloop.h"
  92 #include "tree-data-ref.h"
  93 #include "tree-scalar-evolution.h"
  94 #include "dumpfile.h"
  95 #include "tree-affine.h"
  96 #include "params.h"
  97 #include "builtins.h"
  98
  99 static struct datadep_stats
 100 {
 101   int num_dependence_tests;
 102   int num_dependence_dependent;
 103   int num_dependence_independent;
 104   int num_dependence_undetermined;
 105
 106   int num_subscript_tests;
 107   int num_subscript_undetermined;
 108   int num_same_subscript_function;
 109
 110   int num_ziv;
 111   int num_ziv_independent;
 112   int num_ziv_dependent;
 113   int num_ziv_unimplemented;
 114
 115   int num_siv;
 116   int num_siv_independent;
 117   int num_siv_dependent;
 118   int num_siv_unimplemented;
 119
 120   int num_miv;
 121   int num_miv_independent;
 122   int num_miv_dependent;
 123   int num_miv_unimplemented;
 124 } dependence_stats;
 125
 126 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
 127                                            unsigned int, unsigned int,
 128                                            struct loop *);
 129 /* Returns true iff A divides B.  */
 130
 131 static inline bool
 132 tree_fold_divides_p (const_tree a, const_tree b)
 133 {
 134   gcc_assert (TREE_CODE (a) == INTEGER_CST);
 135   gcc_assert (TREE_CODE (b) == INTEGER_CST);
 136   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
 137 }
 138
 139 /* Returns true iff A divides B.  */
 140
 141 static inline bool
 142 int_divides_p (int a, int b)
 143 {
 144   return ((b % a) == 0);
 145 }
 146
 147 /* Return true if reference REF contains a union access.  */
 148
 149 static bool
 150 ref_contains_union_access_p (tree ref)
 151 {
 152   while (handled_component_p (ref))
 153     {
 154       ref = TREE_OPERAND (ref, 0);
 155       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
 156           || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
 157         return true;
 158     }
 159   return false;
 160 }
 161
 162 \f
 163
 164 /* Dump into FILE all the data references from DATAREFS.  */
 165
 166 static void
 167 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
 168 {
 169   unsigned int i;
 170   struct data_reference *dr;
 171
 172   FOR_EACH_VEC_ELT (datarefs, i, dr)
 173     dump_data_reference (file, dr);
 174 }
 175
 176 /* Unified dump into FILE all the data references from DATAREFS.  */
 177
 178 DEBUG_FUNCTION void
 179 debug (vec<data_reference_p> &ref)
 180 {
 181   dump_data_references (stderr, ref);
 182 }
 183
 184 DEBUG_FUNCTION void
 185 debug (vec<data_reference_p> *ptr)
 186 {
 187   if (ptr)
 188     debug (*ptr);
 189   else
 190     fprintf (stderr, "<nil>\n");
 191 }
 192
 193
 194 /* Dump into STDERR all the data references from DATAREFS.  */
 195
 196 DEBUG_FUNCTION void
 197 debug_data_references (vec<data_reference_p> datarefs)
 198 {
 199   dump_data_references (stderr, datarefs);
 200 }
 201
 202 /* Print to STDERR the data_reference DR.  */
 203
 204 DEBUG_FUNCTION void
 205 debug_data_reference (struct data_reference *dr)
 206 {
 207   dump_data_reference (stderr, dr);
 208 }
 209
 210 /* Dump function for a DATA_REFERENCE structure.  */
 211
 212 void
 213 dump_data_reference (FILE *outf,
 214                      struct data_reference *dr)
 215 {
 216   unsigned int i;
 217
 218   fprintf (outf, "#(Data Ref: \n");
 219   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
 220   fprintf (outf, "#  stmt: ");
 221   print_gimple_stmt (outf, DR_STMT (dr), 0);
 222   fprintf (outf, "#  ref: ");
 223   print_generic_stmt (outf, DR_REF (dr));
 224   fprintf (outf, "#  base_object: ");
 225   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
 226
 227   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
 228     {
 229       fprintf (outf, "#  Access function %d: ", i);
 230       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
 231     }
 232   fprintf (outf, "#)\n");
 233 }
 234
 235 /* Unified dump function for a DATA_REFERENCE structure.  */
 236
 237 DEBUG_FUNCTION void
 238 debug (data_reference &ref)
 239 {
 240   dump_data_reference (stderr, &ref);
 241 }
 242
 243 DEBUG_FUNCTION void
 244 debug (data_reference *ptr)
 245 {
 246   if (ptr)
 247     debug (*ptr);
 248   else
 249     fprintf (stderr, "<nil>\n");
 250 }
 251
 252
 253 /* Dumps the affine function described by FN to the file OUTF.  */
 254
 255 DEBUG_FUNCTION void
 256 dump_affine_function (FILE *outf, affine_fn fn)
 257 {
 258   unsigned i;
 259   tree coef;
 260
 261   print_generic_expr (outf, fn[0], TDF_SLIM);
 262   for (i = 1; fn.iterate (i, &coef); i++)
 263     {
 264       fprintf (outf, " + ");
 265       print_generic_expr (outf, coef, TDF_SLIM);
 266       fprintf (outf, " * x_%u", i);
 267     }
 268 }
 269
 270 /* Dumps the conflict function CF to the file OUTF.  */
 271
 272 DEBUG_FUNCTION void
 273 dump_conflict_function (FILE *outf, conflict_function *cf)
 274 {
 275   unsigned i;
 276
 277   if (cf->n == NO_DEPENDENCE)
 278     fprintf (outf, "no dependence");
 279   else if (cf->n == NOT_KNOWN)
 280     fprintf (outf, "not known");
 281   else
 282     {
 283       for (i = 0; i < cf->n; i++)
 284         {
 285           if (i != 0)
 286             fprintf (outf, " ");
 287           fprintf (outf, "[");
 288           dump_affine_function (outf, cf->fns[i]);
 289           fprintf (outf, "]");
 290         }
 291     }
 292 }
 293
 294 /* Dump function for a SUBSCRIPT structure.  */
 295
 296 DEBUG_FUNCTION void
 297 dump_subscript (FILE *outf, struct subscript *subscript)
 298 {
 299   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
 300
 301   fprintf (outf, "\n (subscript \n");
 302   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
 303   dump_conflict_function (outf, cf);
 304   if (CF_NONTRIVIAL_P (cf))
 305     {
 306       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 307       fprintf (outf, "\n  last_conflict: ");
 308       print_generic_expr (outf, last_iteration);
 309     }
 310
 311   cf = SUB_CONFLICTS_IN_B (subscript);
 312   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
 313   dump_conflict_function (outf, cf);
 314   if (CF_NONTRIVIAL_P (cf))
 315     {
 316       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 317       fprintf (outf, "\n  last_conflict: ");
 318       print_generic_expr (outf, last_iteration);
 319     }
 320
 321   fprintf (outf, "\n  (Subscript distance: ");
 322   print_generic_expr (outf, SUB_DISTANCE (subscript));
 323   fprintf (outf, " ))\n");
 324 }
 325
 326 /* Print the classic direction vector DIRV to OUTF.  */
 327
 328 DEBUG_FUNCTION void
 329 print_direction_vector (FILE *outf,
 330                         lambda_vector dirv,
 331                         int length)
 332 {
 333   int eq;
 334
 335   for (eq = 0; eq < length; eq++)
 336     {
 337       enum data_dependence_direction dir = ((enum data_dependence_direction)
 338                                             dirv[eq]);
 339
 340       switch (dir)
 341         {
 342         case dir_positive:
 343           fprintf (outf, "    +");
 344           break;
 345         case dir_negative:
 346           fprintf (outf, "    -");
 347           break;
 348         case dir_equal:
 349           fprintf (outf, "    =");
 350           break;
 351         case dir_positive_or_equal:
 352           fprintf (outf, "   +=");
 353           break;
 354         case dir_positive_or_negative:
 355           fprintf (outf, "   +-");
 356           break;
 357         case dir_negative_or_equal:
 358           fprintf (outf, "   -=");
 359           break;
 360         case dir_star:
 361           fprintf (outf, "    *");
 362           break;
 363         default:
 364           fprintf (outf, "indep");
 365           break;
 366         }
 367     }
 368   fprintf (outf, "\n");
 369 }
 370
 371 /* Print a vector of direction vectors.  */
 372
 373 DEBUG_FUNCTION void
 374 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
 375                    int length)
 376 {
 377   unsigned j;
 378   lambda_vector v;
 379
 380   FOR_EACH_VEC_ELT (dir_vects, j, v)
 381     print_direction_vector (outf, v, length);
 382 }
 383
 384 /* Print out a vector VEC of length N to OUTFILE.  */
 385
 386 DEBUG_FUNCTION void
 387 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
 388 {
 389   int i;
 390
 391   for (i = 0; i < n; i++)
 392     fprintf (outfile, "%3d ", vector[i]);
 393   fprintf (outfile, "\n");
 394 }
 395
 396 /* Print a vector of distance vectors.  */
 397
 398 DEBUG_FUNCTION void
 399 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
 400                     int length)
 401 {
 402   unsigned j;
 403   lambda_vector v;
 404
 405   FOR_EACH_VEC_ELT (dist_vects, j, v)
 406     print_lambda_vector (outf, v, length);
 407 }
 408
 409 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
 410
 411 DEBUG_FUNCTION void
 412 dump_data_dependence_relation (FILE *outf,
 413                                struct data_dependence_relation *ddr)
 414 {
 415   struct data_reference *dra, *drb;
 416
 417   fprintf (outf, "(Data Dep: \n");
 418
 419   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 420     {
 421       if (ddr)
 422         {
 423           dra = DDR_A (ddr);
 424           drb = DDR_B (ddr);
 425           if (dra)
 426             dump_data_reference (outf, dra);
 427           else
 428             fprintf (outf, "    (nil)\n");
 429           if (drb)
 430             dump_data_reference (outf, drb);
 431           else
 432             fprintf (outf, "    (nil)\n");
 433         }
 434       fprintf (outf, "    (don't know)\n)\n");
 435       return;
 436     }
 437
 438   dra = DDR_A (ddr);
 439   drb = DDR_B (ddr);
 440   dump_data_reference (outf, dra);
 441   dump_data_reference (outf, drb);
 442
 443   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 444     fprintf (outf, "    (no dependence)\n");
 445
 446   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
 447     {
 448       unsigned int i;
 449       struct loop *loopi;
 450
 451       subscript *sub;
 452       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
 453         {
 454           fprintf (outf, "  access_fn_A: ");
 455           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
 456           fprintf (outf, "  access_fn_B: ");
 457           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
 458           dump_subscript (outf, sub);
 459         }
 460
 461       fprintf (outf, "  inner loop index: %d\n", DDR_INNER_LOOP (ddr));
 462       fprintf (outf, "  loop nest: (");
 463       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
 464         fprintf (outf, "%d ", loopi->num);
 465       fprintf (outf, ")\n");
 466
 467       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
 468         {
 469           fprintf (outf, "  distance_vector: ");
 470           print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
 471                                DDR_NB_LOOPS (ddr));
 472         }
 473
 474       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
 475         {
 476           fprintf (outf, "  direction_vector: ");
 477           print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
 478                                   DDR_NB_LOOPS (ddr));
 479         }
 480     }
 481
 482   fprintf (outf, ")\n");
 483 }
 484
 485 /* Debug version.  */
 486
 487 DEBUG_FUNCTION void
 488 debug_data_dependence_relation (struct data_dependence_relation *ddr)
 489 {
 490   dump_data_dependence_relation (stderr, ddr);
 491 }
 492
 493 /* Dump into FILE all the dependence relations from DDRS.  */
 494
 495 DEBUG_FUNCTION void
 496 dump_data_dependence_relations (FILE *file,
 497                                 vec<ddr_p> ddrs)
 498 {
 499   unsigned int i;
 500   struct data_dependence_relation *ddr;
 501
 502   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 503     dump_data_dependence_relation (file, ddr);
 504 }
 505
 506 DEBUG_FUNCTION void
 507 debug (vec<ddr_p> &ref)
 508 {
 509   dump_data_dependence_relations (stderr, ref);
 510 }
 511
 512 DEBUG_FUNCTION void
 513 debug (vec<ddr_p> *ptr)
 514 {
 515   if (ptr)
 516     debug (*ptr);
 517   else
 518     fprintf (stderr, "<nil>\n");
 519 }
 520
 521
 522 /* Dump to STDERR all the dependence relations from DDRS.  */
 523
 524 DEBUG_FUNCTION void
 525 debug_data_dependence_relations (vec<ddr_p> ddrs)
 526 {
 527   dump_data_dependence_relations (stderr, ddrs);
 528 }
 529
 530 /* Dumps the distance and direction vectors in FILE.  DDRS contains
 531    the dependence relations, and VECT_SIZE is the size of the
 532    dependence vectors, or in other words the number of loops in the
 533    considered nest.  */
 534
 535 DEBUG_FUNCTION void
 536 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
 537 {
 538   unsigned int i, j;
 539   struct data_dependence_relation *ddr;
 540   lambda_vector v;
 541
 542   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 543     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
 544       {
 545         FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
 546           {
 547             fprintf (file, "DISTANCE_V (");
 548             print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
 549             fprintf (file, ")\n");
 550           }
 551
 552         FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
 553           {
 554             fprintf (file, "DIRECTION_V (");
 555             print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
 556             fprintf (file, ")\n");
 557           }
 558       }
 559
 560   fprintf (file, "\n\n");
 561 }
 562
 563 /* Dumps the data dependence relations DDRS in FILE.  */
 564
 565 DEBUG_FUNCTION void
 566 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
 567 {
 568   unsigned int i;
 569   struct data_dependence_relation *ddr;
 570
 571   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 572     dump_data_dependence_relation (file, ddr);
 573
 574   fprintf (file, "\n\n");
 575 }
 576
 577 DEBUG_FUNCTION void
 578 debug_ddrs (vec<ddr_p> ddrs)
 579 {
 580   dump_ddrs (stderr, ddrs);
 581 }
 582
 583 /* Helper function for split_constant_offset.  Expresses OP0 CODE OP1
 584    (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
 585    constant of type ssizetype, and returns true.  If we cannot do this
 586    with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
 587    is returned.  */
 588
 589 static bool
 590 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 591                          tree *var, tree *off)
 592 {
 593   tree var0, var1;
 594   tree off0, off1;
 595   enum tree_code ocode = code;
 596
 597   *var = NULL_TREE;
 598   *off = NULL_TREE;
 599
 600   switch (code)
 601     {
 602     case INTEGER_CST:
 603       *var = build_int_cst (type, 0);
 604       *off = fold_convert (ssizetype, op0);
 605       return true;
 606
 607     case POINTER_PLUS_EXPR:
 608       ocode = PLUS_EXPR;
 609       /* FALLTHROUGH */
 610     case PLUS_EXPR:
 611     case MINUS_EXPR:
 612       split_constant_offset (op0, &var0, &off0);
 613       split_constant_offset (op1, &var1, &off1);
 614       *var = fold_build2 (code, type, var0, var1);
 615       *off = size_binop (ocode, off0, off1);
 616       return true;
 617
 618     case MULT_EXPR:
 619       if (TREE_CODE (op1) != INTEGER_CST)
 620         return false;
 621
 622       split_constant_offset (op0, &var0, &off0);
 623       *var = fold_build2 (MULT_EXPR, type, var0, op1);
 624       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
 625       return true;
 626
 627     case ADDR_EXPR:
 628       {
 629         tree base, poffset;
 630         poly_int64 pbitsize, pbitpos, pbytepos;
 631         machine_mode pmode;
 632         int punsignedp, preversep, pvolatilep;
 633
 634         op0 = TREE_OPERAND (op0, 0);
 635         base
 636           = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
 637                                  &punsignedp, &preversep, &pvolatilep);
 638
 639         if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 640           return false;
 641         base = build_fold_addr_expr (base);
 642         off0 = ssize_int (pbytepos);
 643
 644         if (poffset)
 645           {
 646             split_constant_offset (poffset, &poffset, &off1);
 647             off0 = size_binop (PLUS_EXPR, off0, off1);
 648             if (POINTER_TYPE_P (TREE_TYPE (base)))
 649               base = fold_build_pointer_plus (base, poffset);
 650             else
 651               base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
 652                                   fold_convert (TREE_TYPE (base), poffset));
 653           }
 654
 655         var0 = fold_convert (type, base);
 656
 657         /* If variable length types are involved, punt, otherwise casts
 658            might be converted into ARRAY_REFs in gimplify_conversion.
 659            To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
 660            possibly no longer appears in current GIMPLE, might resurface.
 661            This perhaps could run
 662            if (CONVERT_EXPR_P (var0))
 663              {
 664                gimplify_conversion (&var0);
 665                // Attempt to fill in any within var0 found ARRAY_REF's
 666                // element size from corresponding op embedded ARRAY_REF,
 667                // if unsuccessful, just punt.
 668              }  */
 669         while (POINTER_TYPE_P (type))
 670           type = TREE_TYPE (type);
 671         if (int_size_in_bytes (type) < 0)
 672           return false;
 673
 674         *var = var0;
 675         *off = off0;
 676         return true;
 677       }
 678
 679     case SSA_NAME:
 680       {
 681         if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
 682           return false;
 683
 684         gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
 685         enum tree_code subcode;
 686
 687         if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
 688           return false;
 689
 690         var0 = gimple_assign_rhs1 (def_stmt);
 691         subcode = gimple_assign_rhs_code (def_stmt);
 692         var1 = gimple_assign_rhs2 (def_stmt);
 693
 694         return split_constant_offset_1 (type, var0, subcode, var1, var, off);
 695       }
 696     CASE_CONVERT:
 697       {
 698         /* We must not introduce undefined overflow, and we must not change the value.
 699            Hence we're okay if the inner type doesn't overflow to start with
 700            (pointer or signed), the outer type also is an integer or pointer
 701            and the outer precision is at least as large as the inner.  */
 702         tree itype = TREE_TYPE (op0);
 703         if ((POINTER_TYPE_P (itype)
 704              || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
 705             && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
 706             && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
 707           {
 708             split_constant_offset (op0, &var0, off);
 709             *var = fold_convert (type, var0);
 710             return true;
 711           }
 712         return false;
 713       }
 714
 715     default:
 716       return false;
 717     }
 718 }
 719
 720 /* Expresses EXP as VAR + OFF, where off is a constant.  The type of OFF
 721    will be ssizetype.  */
 722
 723 void
 724 split_constant_offset (tree exp, tree *var, tree *off)
 725 {
 726   tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
 727   enum tree_code code;
 728
 729   *var = exp;
 730   *off = ssize_int (0);
 731   STRIP_NOPS (exp);
 732
 733   if (tree_is_chrec (exp)
 734       || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
 735     return;
 736
 737   otype = TREE_TYPE (exp);
 738   code = TREE_CODE (exp);
 739   extract_ops_from_tree (exp, &code, &op0, &op1);
 740   if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
 741     {
 742       *var = fold_convert (type, e);
 743       *off = o;
 744     }
 745 }
 746
 747 /* Returns the address ADDR of an object in a canonical shape (without nop
 748    casts, and with type of pointer to the object).  */
 749
 750 static tree
 751 canonicalize_base_object_address (tree addr)
 752 {
 753   tree orig = addr;
 754
 755   STRIP_NOPS (addr);
 756
 757   /* The base address may be obtained by casting from integer, in that case
 758      keep the cast.  */
 759   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
 760     return orig;
 761
 762   if (TREE_CODE (addr) != ADDR_EXPR)
 763     return addr;
 764
 765   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
 766 }
 767
 768 /* Analyze the behavior of memory reference REF.  There are two modes:
 769
 770    - BB analysis.  In this case we simply split the address into base,
 771      init and offset components, without reference to any containing loop.
 772      The resulting base and offset are general expressions and they can
 773      vary arbitrarily from one iteration of the containing loop to the next.
 774      The step is always zero.
 775
 776    - loop analysis.  In this case we analyze the reference both wrt LOOP
 777      and on the basis that the reference occurs (is "used") in LOOP;
 778      see the comment above analyze_scalar_evolution_in_loop for more
 779      information about this distinction.  The base, init, offset and
 780      step fields are all invariant in LOOP.
 781
 782    Perform BB analysis if LOOP is null, or if LOOP is the function's
 783    dummy outermost loop.  In other cases perform loop analysis.
 784
 785    Return true if the analysis succeeded and store the results in DRB if so.
 786    BB analysis can only fail for bitfield or reversed-storage accesses.  */
 787
 788 bool
 789 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
 790                       struct loop *loop)
 791 {
 792   poly_int64 pbitsize, pbitpos;
 793   tree base, poffset;
 794   machine_mode pmode;
 795   int punsignedp, preversep, pvolatilep;
 796   affine_iv base_iv, offset_iv;
 797   tree init, dinit, step;
 798   bool in_loop = (loop && loop->num);
 799
 800   if (dump_file && (dump_flags & TDF_DETAILS))
 801     fprintf (dump_file, "analyze_innermost: ");
 802
 803   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
 804                               &punsignedp, &preversep, &pvolatilep);
 805   gcc_assert (base != NULL_TREE);
 806
 807   poly_int64 pbytepos;
 808   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 809     {
 810       if (dump_file && (dump_flags & TDF_DETAILS))
 811         fprintf (dump_file, "failed: bit offset alignment.\n");
 812       return false;
 813     }
 814
 815   if (preversep)
 816     {
 817       if (dump_file && (dump_flags & TDF_DETAILS))
 818         fprintf (dump_file, "failed: reverse storage order.\n");
 819       return false;
 820     }
 821
 822   /* Calculate the alignment and misalignment for the inner reference.  */
 823   unsigned int HOST_WIDE_INT base_misalignment;
 824   unsigned int base_alignment;
 825   get_object_alignment_1 (base, &base_alignment, &base_misalignment);
 826
 827   /* There are no bitfield references remaining in BASE, so the values
 828      we got back must be whole bytes.  */
 829   gcc_assert (base_alignment % BITS_PER_UNIT == 0
 830               && base_misalignment % BITS_PER_UNIT == 0);
 831   base_alignment /= BITS_PER_UNIT;
 832   base_misalignment /= BITS_PER_UNIT;
 833
 834   if (TREE_CODE (base) == MEM_REF)
 835     {
 836       if (!integer_zerop (TREE_OPERAND (base, 1)))
 837         {
 838           /* Subtract MOFF from the base and add it to POFFSET instead.
 839              Adjust the misalignment to reflect the amount we subtracted.  */
 840           offset_int moff = mem_ref_offset (base);
 841           base_misalignment -= moff.to_short_addr ();
 842           tree mofft = wide_int_to_tree (sizetype, moff);
 843           if (!poffset)
 844             poffset = mofft;
 845           else
 846             poffset = size_binop (PLUS_EXPR, poffset, mofft);
 847         }
 848       base = TREE_OPERAND (base, 0);
 849     }
 850   else
 851     base = build_fold_addr_expr (base);
 852
 853   if (in_loop)
 854     {
 855       if (!simple_iv (loop, loop, base, &base_iv, true))
 856         {
 857           if (dump_file && (dump_flags & TDF_DETAILS))
 858             fprintf (dump_file, "failed: evolution of base is not affine.\n");
 859           return false;
 860         }
 861     }
 862   else
 863     {
 864       base_iv.base = base;
 865       base_iv.step = ssize_int (0);
 866       base_iv.no_overflow = true;
 867     }
 868
 869   if (!poffset)
 870     {
 871       offset_iv.base = ssize_int (0);
 872       offset_iv.step = ssize_int (0);
 873     }
 874   else
 875     {
 876       if (!in_loop)
 877         {
 878           offset_iv.base = poffset;
 879           offset_iv.step = ssize_int (0);
 880         }
 881       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
 882         {
 883           if (dump_file && (dump_flags & TDF_DETAILS))
 884             fprintf (dump_file, "failed: evolution of offset is not affine.\n");
 885           return false;
 886         }
 887     }
 888
 889   init = ssize_int (pbytepos);
 890
 891   /* Subtract any constant component from the base and add it to INIT instead.
 892      Adjust the misalignment to reflect the amount we subtracted.  */
 893   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
 894   init = size_binop (PLUS_EXPR, init, dinit);
 895   base_misalignment -= TREE_INT_CST_LOW (dinit);
 896
 897   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
 898   init = size_binop (PLUS_EXPR, init, dinit);
 899
 900   step = size_binop (PLUS_EXPR,
 901                      fold_convert (ssizetype, base_iv.step),
 902                      fold_convert (ssizetype, offset_iv.step));
 903
 904   base = canonicalize_base_object_address (base_iv.base);
 905
 906   /* See if get_pointer_alignment can guarantee a higher alignment than
 907      the one we calculated above.  */
 908   unsigned int HOST_WIDE_INT alt_misalignment;
 909   unsigned int alt_alignment;
 910   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
 911
 912   /* As above, these values must be whole bytes.  */
 913   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
 914               && alt_misalignment % BITS_PER_UNIT == 0);
 915   alt_alignment /= BITS_PER_UNIT;
 916   alt_misalignment /= BITS_PER_UNIT;
 917
 918   if (base_alignment < alt_alignment)
 919     {
 920       base_alignment = alt_alignment;
 921       base_misalignment = alt_misalignment;
 922     }
 923
 924   drb->base_address = base;
 925   drb->offset = fold_convert (ssizetype, offset_iv.base);
 926   drb->init = init;
 927   drb->step = step;
 928   drb->base_alignment = base_alignment;
 929   drb->base_misalignment = base_misalignment & (base_alignment - 1);
 930   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
 931   drb->step_alignment = highest_pow2_factor (step);
 932
 933   if (dump_file && (dump_flags & TDF_DETAILS))
 934     fprintf (dump_file, "success.\n");
 935
 936   return true;
 937 }
 938
 939 /* Return true if OP is a valid component reference for a DR access
 940    function.  This accepts a subset of what handled_component_p accepts.  */
 941
 942 static bool
 943 access_fn_component_p (tree op)
 944 {
 945   switch (TREE_CODE (op))
 946     {
 947     case REALPART_EXPR:
 948     case IMAGPART_EXPR:
 949     case ARRAY_REF:
 950       return true;
 951
 952     case COMPONENT_REF:
 953       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
 954
 955     default:
 956       return false;
 957     }
 958 }
 959
 960 /* Determines the base object and the list of indices of memory reference
 961    DR, analyzed in LOOP and instantiated before NEST.  */
 962
 963 static void
 964 dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
 965 {
 966   vec<tree> access_fns = vNULL;
 967   tree ref, op;
 968   tree base, off, access_fn;
 969
 970   /* If analyzing a basic-block there are no indices to analyze
 971      and thus no access functions.  */
 972   if (!nest)
 973     {
 974       DR_BASE_OBJECT (dr) = DR_REF (dr);
 975       DR_ACCESS_FNS (dr).create (0);
 976       return;
 977     }
 978
 979   ref = DR_REF (dr);
 980
 981   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
 982      into a two element array with a constant index.  The base is
 983      then just the immediate underlying object.  */
 984   if (TREE_CODE (ref) == REALPART_EXPR)
 985     {
 986       ref = TREE_OPERAND (ref, 0);
 987       access_fns.safe_push (integer_zero_node);
 988     }
 989   else if (TREE_CODE (ref) == IMAGPART_EXPR)
 990     {
 991       ref = TREE_OPERAND (ref, 0);
 992       access_fns.safe_push (integer_one_node);
 993     }
 994
 995   /* Analyze access functions of dimensions we know to be independent.
 996      The list of component references handled here should be kept in
 997      sync with access_fn_component_p.  */
 998   while (handled_component_p (ref))
 999     {
1000       if (TREE_CODE (ref) == ARRAY_REF)
1001         {
1002           op = TREE_OPERAND (ref, 1);
1003           access_fn = analyze_scalar_evolution (loop, op);
1004           access_fn = instantiate_scev (nest, loop, access_fn);
1005           access_fns.safe_push (access_fn);
1006         }
1007       else if (TREE_CODE (ref) == COMPONENT_REF
1008                && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1009         {
1010           /* For COMPONENT_REFs of records (but not unions!) use the
1011              FIELD_DECL offset as constant access function so we can
1012              disambiguate a[i].f1 and a[i].f2.  */
1013           tree off = component_ref_field_offset (ref);
1014           off = size_binop (PLUS_EXPR,
1015                             size_binop (MULT_EXPR,
1016                                         fold_convert (bitsizetype, off),
1017                                         bitsize_int (BITS_PER_UNIT)),
1018                             DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1019           access_fns.safe_push (off);
1020         }
1021       else
1022         /* If we have an unhandled component we could not translate
1023            to an access function stop analyzing.  We have determined
1024            our base object in this case.  */
1025         break;
1026
1027       ref = TREE_OPERAND (ref, 0);
1028     }
1029
1030   /* If the address operand of a MEM_REF base has an evolution in the
1031      analyzed nest, add it as an additional independent access-function.  */
1032   if (TREE_CODE (ref) == MEM_REF)
1033     {
1034       op = TREE_OPERAND (ref, 0);
1035       access_fn = analyze_scalar_evolution (loop, op);
1036       access_fn = instantiate_scev (nest, loop, access_fn);
1037       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1038         {
1039           tree orig_type;
1040           tree memoff = TREE_OPERAND (ref, 1);
1041           base = initial_condition (access_fn);
1042           orig_type = TREE_TYPE (base);
1043           STRIP_USELESS_TYPE_CONVERSION (base);
1044           split_constant_offset (base, &base, &off);
1045           STRIP_USELESS_TYPE_CONVERSION (base);
1046           /* Fold the MEM_REF offset into the evolutions initial
1047              value to make more bases comparable.  */
1048           if (!integer_zerop (memoff))
1049             {
1050               off = size_binop (PLUS_EXPR, off,
1051                                 fold_convert (ssizetype, memoff));
1052               memoff = build_int_cst (TREE_TYPE (memoff), 0);
1053             }
1054           /* Adjust the offset so it is a multiple of the access type
1055              size and thus we separate bases that can possibly be used
1056              to produce partial overlaps (which the access_fn machinery
1057              cannot handle).  */
1058           wide_int rem;
1059           if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1060               && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1061               && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1062             rem = wi::mod_trunc
1063               (wi::to_wide (off),
1064                wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1065                SIGNED);
1066           else
1067             /* If we can't compute the remainder simply force the initial
1068                condition to zero.  */
1069             rem = wi::to_wide (off);
1070           off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1071           memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1072           /* And finally replace the initial condition.  */
1073           access_fn = chrec_replace_initial_condition
1074               (access_fn, fold_convert (orig_type, off));
1075           /* ???  This is still not a suitable base object for
1076              dr_may_alias_p - the base object needs to be an
1077              access that covers the object as whole.  With
1078              an evolution in the pointer this cannot be
1079              guaranteed.
1080              As a band-aid, mark the access so we can special-case
1081              it in dr_may_alias_p.  */
1082           tree old = ref;
1083           ref = fold_build2_loc (EXPR_LOCATION (ref),
1084                                  MEM_REF, TREE_TYPE (ref),
1085                                  base, memoff);
1086           MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1087           MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1088           DR_UNCONSTRAINED_BASE (dr) = true;
1089           access_fns.safe_push (access_fn);
1090         }
1091     }
1092   else if (DECL_P (ref))
1093     {
1094       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1095       ref = build2 (MEM_REF, TREE_TYPE (ref),
1096                     build_fold_addr_expr (ref),
1097                     build_int_cst (reference_alias_ptr_type (ref), 0));
1098     }
1099
1100   DR_BASE_OBJECT (dr) = ref;
1101   DR_ACCESS_FNS (dr) = access_fns;
1102 }
1103
1104 /* Extracts the alias analysis information from the memory reference DR.  */
1105
1106 static void
1107 dr_analyze_alias (struct data_reference *dr)
1108 {
1109   tree ref = DR_REF (dr);
1110   tree base = get_base_address (ref), addr;
1111
1112   if (INDIRECT_REF_P (base)
1113       || TREE_CODE (base) == MEM_REF)
1114     {
1115       addr = TREE_OPERAND (base, 0);
1116       if (TREE_CODE (addr) == SSA_NAME)
1117         DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1118     }
1119 }
1120
1121 /* Frees data reference DR.  */
1122
1123 void
1124 free_data_ref (data_reference_p dr)
1125 {
1126   DR_ACCESS_FNS (dr).release ();
1127   free (dr);
1128 }
1129
1130 /* Analyze memory reference MEMREF, which is accessed in STMT.
1131    The reference is a read if IS_READ is true, otherwise it is a write.
1132    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1133    within STMT, i.e. that it might not occur even if STMT is executed
1134    and runs to completion.
1135
1136    Return the data_reference description of MEMREF.  NEST is the outermost
1137    loop in which the reference should be instantiated, LOOP is the loop
1138    in which the data reference should be analyzed.  */
1139
1140 struct data_reference *
1141 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1142                  bool is_read, bool is_conditional_in_stmt)
1143 {
1144   struct data_reference *dr;
1145
1146   if (dump_file && (dump_flags & TDF_DETAILS))
1147     {
1148       fprintf (dump_file, "Creating dr for ");
1149       print_generic_expr (dump_file, memref, TDF_SLIM);
1150       fprintf (dump_file, "\n");
1151     }
1152
1153   dr = XCNEW (struct data_reference);
1154   DR_STMT (dr) = stmt;
1155   DR_REF (dr) = memref;
1156   DR_IS_READ (dr) = is_read;
1157   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1158
1159   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1160                         nest != NULL ? loop : NULL);
1161   dr_analyze_indices (dr, nest, loop);
1162   dr_analyze_alias (dr);
1163
1164   if (dump_file && (dump_flags & TDF_DETAILS))
1165     {
1166       unsigned i;
1167       fprintf (dump_file, "\tbase_address: ");
1168       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1169       fprintf (dump_file, "\n\toffset from base address: ");
1170       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1171       fprintf (dump_file, "\n\tconstant offset from base address: ");
1172       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1173       fprintf (dump_file, "\n\tstep: ");
1174       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1175       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1176       fprintf (dump_file, "\n\tbase misalignment: %d",
1177                DR_BASE_MISALIGNMENT (dr));
1178       fprintf (dump_file, "\n\toffset alignment: %d",
1179                DR_OFFSET_ALIGNMENT (dr));
1180       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1181       fprintf (dump_file, "\n\tbase_object: ");
1182       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1183       fprintf (dump_file, "\n");
1184       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1185         {
1186           fprintf (dump_file, "\tAccess function %d: ", i);
1187           print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1188         }
1189     }
1190
1191   return dr;
1192 }
1193
1194 /*  A helper function computes order between two tree epxressions T1 and T2.
1195     This is used in comparator functions sorting objects based on the order
1196     of tree expressions.  The function returns -1, 0, or 1.  */
1197
1198 int
1199 data_ref_compare_tree (tree t1, tree t2)
1200 {
1201   int i, cmp;
1202   enum tree_code code;
1203   char tclass;
1204
1205   if (t1 == t2)
1206     return 0;
1207   if (t1 == NULL)
1208     return -1;
1209   if (t2 == NULL)
1210     return 1;
1211
1212   STRIP_USELESS_TYPE_CONVERSION (t1);
1213   STRIP_USELESS_TYPE_CONVERSION (t2);
1214   if (t1 == t2)
1215     return 0;
1216
1217   if (TREE_CODE (t1) != TREE_CODE (t2)
1218       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1219     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1220
1221   code = TREE_CODE (t1);
1222   switch (code)
1223     {
1224     case INTEGER_CST:
1225       return tree_int_cst_compare (t1, t2);
1226
1227     case STRING_CST:
1228       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1229         return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1230       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1231                      TREE_STRING_LENGTH (t1));
1232
1233     case SSA_NAME:
1234       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1235         return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1236       break;
1237
1238     default:
1239       if (POLY_INT_CST_P (t1))
1240         return compare_sizes_for_sort (wi::to_poly_widest (t1),
1241                                        wi::to_poly_widest (t2));
1242
1243       tclass = TREE_CODE_CLASS (code);
1244
1245       /* For decls, compare their UIDs.  */
1246       if (tclass == tcc_declaration)
1247         {
1248           if (DECL_UID (t1) != DECL_UID (t2))
1249             return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1250           break;
1251         }
1252       /* For expressions, compare their operands recursively.  */
1253       else if (IS_EXPR_CODE_CLASS (tclass))
1254         {
1255           for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1256             {
1257               cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1258                                            TREE_OPERAND (t2, i));
1259               if (cmp != 0)
1260                 return cmp;
1261             }
1262         }
1263       else
1264         gcc_unreachable ();
1265     }
1266
1267   return 0;
1268 }
1269
1270 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1271    check.  */
1272
1273 bool
1274 runtime_alias_check_p (ddr_p ddr, struct loop *loop, bool speed_p)
1275 {
1276   if (dump_enabled_p ())
1277     {
1278       dump_printf (MSG_NOTE, "consider run-time aliasing test between ");
1279       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
1280       dump_printf (MSG_NOTE,  " and ");
1281       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
1282       dump_printf (MSG_NOTE, "\n");
1283     }
1284
1285   if (!speed_p)
1286     {
1287       if (dump_enabled_p ())
1288         dump_printf (MSG_MISSED_OPTIMIZATION,
1289                      "runtime alias check not supported when optimizing "
1290                      "for size.\n");
1291       return false;
1292     }
1293
1294   /* FORNOW: We don't support versioning with outer-loop in either
1295      vectorization or loop distribution.  */
1296   if (loop != NULL && loop->inner != NULL)
1297     {
1298       if (dump_enabled_p ())
1299         dump_printf (MSG_MISSED_OPTIMIZATION,
1300                      "runtime alias check not supported for outer loop.\n");
1301       return false;
1302     }
1303
1304   /* FORNOW: We don't support creating runtime alias tests for non-constant
1305      step.  */
1306   if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
1307       || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
1308     {
1309       if (dump_enabled_p ())
1310         dump_printf (MSG_MISSED_OPTIMIZATION,
1311                      "runtime alias check not supported for non-constant "
1312                      "step\n");
1313       return false;
1314     }
1315
1316   return true;
1317 }
1318
1319 /* Operator == between two dr_with_seg_len objects.
1320
1321    This equality operator is used to make sure two data refs
1322    are the same one so that we will consider to combine the
1323    aliasing checks of those two pairs of data dependent data
1324    refs.  */
1325
1326 static bool
1327 operator == (const dr_with_seg_len& d1,
1328              const dr_with_seg_len& d2)
1329 {
1330   return operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1331                           DR_BASE_ADDRESS (d2.dr), 0)
1332            && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1333            && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1334            && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0;
1335 }
1336
1337 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1338    so that we can combine aliasing checks in one scan.  */
1339
1340 static int
1341 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1342 {
1343   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1344   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1345   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1346   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1347
1348   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1349      if a and c have the same basic address snd step, and b and d have the same
1350      address and step.  Therefore, if any a&c or b&d don't have the same address
1351      and step, we don't care the order of those two pairs after sorting.  */
1352   int comp_res;
1353
1354   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1355                                          DR_BASE_ADDRESS (b1.dr))) != 0)
1356     return comp_res;
1357   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1358                                          DR_BASE_ADDRESS (b2.dr))) != 0)
1359     return comp_res;
1360   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1361                                          DR_STEP (b1.dr))) != 0)
1362     return comp_res;
1363   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1364                                          DR_STEP (b2.dr))) != 0)
1365     return comp_res;
1366   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1367                                          DR_OFFSET (b1.dr))) != 0)
1368     return comp_res;
1369   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1370                                          DR_INIT (b1.dr))) != 0)
1371     return comp_res;
1372   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1373                                          DR_OFFSET (b2.dr))) != 0)
1374     return comp_res;
1375   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1376                                          DR_INIT (b2.dr))) != 0)
1377     return comp_res;
1378
1379   return 0;
1380 }
1381
1382 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1383    FACTOR is number of iterations that each data reference is accessed.
1384
1385    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1386    we create an expression:
1387
1388    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1389    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1390
1391    for aliasing checks.  However, in some cases we can decrease the number
1392    of checks by combining two checks into one.  For example, suppose we have
1393    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1394    condition is satisfied:
1395
1396    load_ptr_0 < load_ptr_1  &&
1397    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1398
1399    (this condition means, in each iteration of vectorized loop, the accessed
1400    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1401    load_ptr_1.)
1402
1403    we then can use only the following expression to finish the alising checks
1404    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1405
1406    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1407    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1408
1409    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1410    basic address.  */
1411
1412 void
1413 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1414                                unsigned HOST_WIDE_INT factor)
1415 {
1416   /* Sort the collected data ref pairs so that we can scan them once to
1417      combine all possible aliasing checks.  */
1418   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1419
1420   /* Scan the sorted dr pairs and check if we can combine alias checks
1421      of two neighboring dr pairs.  */
1422   for (size_t i = 1; i < alias_pairs->length (); ++i)
1423     {
1424       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1425       dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
1426                       *dr_b1 = &(*alias_pairs)[i-1].second,
1427                       *dr_a2 = &(*alias_pairs)[i].first,
1428                       *dr_b2 = &(*alias_pairs)[i].second;
1429
1430       /* Remove duplicate data ref pairs.  */
1431       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1432         {
1433           if (dump_enabled_p ())
1434             {
1435               dump_printf (MSG_NOTE, "found equal ranges ");
1436               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1437               dump_printf (MSG_NOTE,  ", ");
1438               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1439               dump_printf (MSG_NOTE,  " and ");
1440               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1441               dump_printf (MSG_NOTE,  ", ");
1442               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1443               dump_printf (MSG_NOTE, "\n");
1444             }
1445           alias_pairs->ordered_remove (i--);
1446           continue;
1447         }
1448
1449       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1450         {
1451           /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1452              and DR_A1 and DR_A2 are two consecutive memrefs.  */
1453           if (*dr_a1 == *dr_a2)
1454             {
1455               std::swap (dr_a1, dr_b1);
1456               std::swap (dr_a2, dr_b2);
1457             }
1458
1459           if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1460                                 DR_BASE_ADDRESS (dr_a2->dr), 0)
1461               || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1462                                    DR_OFFSET (dr_a2->dr), 0)
1463               || !tree_fits_shwi_p (DR_INIT (dr_a1->dr))
1464               || !tree_fits_shwi_p (DR_INIT (dr_a2->dr)))
1465             continue;
1466
1467           /* Only merge const step data references.  */
1468           if (TREE_CODE (DR_STEP (dr_a1->dr)) != INTEGER_CST
1469               || TREE_CODE (DR_STEP (dr_a2->dr)) != INTEGER_CST)
1470             continue;
1471
1472           /* DR_A1 and DR_A2 must goes in the same direction.  */
1473           if (tree_int_cst_compare (DR_STEP (dr_a1->dr), size_zero_node)
1474               != tree_int_cst_compare (DR_STEP (dr_a2->dr), size_zero_node))
1475             continue;
1476
1477           bool neg_step
1478             = (tree_int_cst_compare (DR_STEP (dr_a1->dr), size_zero_node) < 0);
1479
1480           /* We need to compute merged segment length at compilation time for
1481              dr_a1 and dr_a2, which is impossible if either one has non-const
1482              segment length.  */
1483           if ((!tree_fits_uhwi_p (dr_a1->seg_len)
1484                || !tree_fits_uhwi_p (dr_a2->seg_len))
1485               && tree_int_cst_compare (DR_STEP (dr_a1->dr),
1486                                        DR_STEP (dr_a2->dr)) != 0)
1487             continue;
1488
1489           /* Make sure dr_a1 starts left of dr_a2.  */
1490           if (tree_int_cst_lt (DR_INIT (dr_a2->dr), DR_INIT (dr_a1->dr)))
1491             std::swap (*dr_a1, *dr_a2);
1492
1493           bool do_remove = false;
1494           wide_int diff = (wi::to_wide (DR_INIT (dr_a2->dr))
1495                            - wi::to_wide (DR_INIT (dr_a1->dr)));
1496           wide_int min_seg_len_b;
1497           tree new_seg_len;
1498
1499           if (TREE_CODE (dr_b1->seg_len) == INTEGER_CST)
1500             min_seg_len_b = wi::abs (wi::to_wide (dr_b1->seg_len));
1501           else
1502             min_seg_len_b
1503               = factor * wi::abs (wi::to_wide (DR_STEP (dr_b1->dr)));
1504
1505           /* Now we try to merge alias check dr_a1 & dr_b and dr_a2 & dr_b.
1506
1507              Case A:
1508                check if the following condition is satisfied:
1509
1510                DIFF - SEGMENT_LENGTH_A < SEGMENT_LENGTH_B
1511
1512                where DIFF = DR_A2_INIT - DR_A1_INIT.  However,
1513                SEGMENT_LENGTH_A or SEGMENT_LENGTH_B may not be constant so we
1514                have to make a best estimation.  We can get the minimum value
1515                of SEGMENT_LENGTH_B as a constant, represented by MIN_SEG_LEN_B,
1516                then either of the following two conditions can guarantee the
1517                one above:
1518
1519                1: DIFF <= MIN_SEG_LEN_B
1520                2: DIFF - SEGMENT_LENGTH_A < MIN_SEG_LEN_B
1521                   Because DIFF - SEGMENT_LENGTH_A is done in sizetype, we need
1522                   to take care of wrapping behavior in it.
1523
1524              Case B:
1525                If the left segment does not extend beyond the start of the
1526                right segment the new segment length is that of the right
1527                plus the segment distance.  The condition is like:
1528
1529                DIFF >= SEGMENT_LENGTH_A   ;SEGMENT_LENGTH_A is a constant.
1530
1531              Note 1: Case A.2 and B combined together effectively merges every
1532              dr_a1 & dr_b and dr_a2 & dr_b when SEGMENT_LENGTH_A is const.
1533
1534              Note 2: Above description is based on positive DR_STEP, we need to
1535              take care of negative DR_STEP for wrapping behavior.  See PR80815
1536              for more information.  */
1537           if (neg_step)
1538             {
1539               /* Adjust diff according to access size of both references.  */
1540               tree size_a1 = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a1->dr)));
1541               tree size_a2 = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a2->dr)));
1542               diff += wi::to_wide (size_a2) - wi::to_wide (size_a1);
1543               /* Case A.1.  */
1544               if (wi::leu_p (diff, min_seg_len_b)
1545                   /* Case A.2 and B combined.  */
1546                   || (tree_fits_uhwi_p (dr_a2->seg_len)))
1547                 {
1548                   if (tree_fits_uhwi_p (dr_a1->seg_len)
1549                       && tree_fits_uhwi_p (dr_a2->seg_len))
1550                     {
1551                       wide_int min_len
1552                         = wi::umin (wi::to_wide (dr_a1->seg_len) - diff,
1553                                     wi::to_wide (dr_a2->seg_len));
1554                       new_seg_len = wide_int_to_tree (sizetype, min_len);
1555                     }
1556                   else
1557                     new_seg_len
1558                       = size_binop (MINUS_EXPR, dr_a2->seg_len,
1559                                     wide_int_to_tree (sizetype, diff));
1560
1561                   dr_a2->seg_len = new_seg_len;
1562                   do_remove = true;
1563                 }
1564             }
1565           else
1566             {
1567               /* Case A.1.  */
1568               if (wi::leu_p (diff, min_seg_len_b)
1569                   /* Case A.2 and B combined.  */
1570                   || (tree_fits_uhwi_p (dr_a1->seg_len)))
1571                 {
1572                   if (tree_fits_uhwi_p (dr_a1->seg_len)
1573                       && tree_fits_uhwi_p (dr_a2->seg_len))
1574                     {
1575                       wide_int max_len
1576                         = wi::umax (wi::to_wide (dr_a2->seg_len) + diff,
1577                                     wi::to_wide (dr_a1->seg_len));
1578                       new_seg_len = wide_int_to_tree (sizetype, max_len);
1579                     }
1580                   else
1581                     new_seg_len
1582                       = size_binop (PLUS_EXPR, dr_a2->seg_len,
1583                                     wide_int_to_tree (sizetype, diff));
1584
1585                   dr_a1->seg_len = new_seg_len;
1586                   do_remove = true;
1587                 }
1588             }
1589
1590           if (do_remove)
1591             {
1592               if (dump_enabled_p ())
1593                 {
1594                   dump_printf (MSG_NOTE, "merging ranges for ");
1595                   dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1596                   dump_printf (MSG_NOTE,  ", ");
1597                   dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1598                   dump_printf (MSG_NOTE,  " and ");
1599                   dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1600                   dump_printf (MSG_NOTE,  ", ");
1601                   dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1602                   dump_printf (MSG_NOTE, "\n");
1603                 }
1604               alias_pairs->ordered_remove (neg_step ? i - 1 : i);
1605               i--;
1606             }
1607         }
1608     }
1609 }
1610
1611 /* Given LOOP's two data references and segment lengths described by DR_A
1612    and DR_B, create expression checking if the two addresses ranges intersect
1613    with each other based on index of the two addresses.  This can only be
1614    done if DR_A and DR_B referring to the same (array) object and the index
1615    is the only difference.  For example:
1616
1617                        DR_A                           DR_B
1618       data-ref         arr[i]                         arr[j]
1619       base_object      arr                            arr
1620       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
1621
1622    The addresses and their index are like:
1623
1624         |<- ADDR_A    ->|          |<- ADDR_B    ->|
1625      ------------------------------------------------------->
1626         |   |   |   |   |          |   |   |   |   |
1627      ------------------------------------------------------->
1628         i_0 ...         i_0+4      j_0 ...         j_0+4
1629
1630    We can create expression based on index rather than address:
1631
1632      (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1633
1634    Note evolution step of index needs to be considered in comparison.  */
1635
1636 static bool
1637 create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
1638                                      const dr_with_seg_len& dr_a,
1639                                      const dr_with_seg_len& dr_b)
1640 {
1641   if (integer_zerop (DR_STEP (dr_a.dr))
1642       || integer_zerop (DR_STEP (dr_b.dr))
1643       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1644     return false;
1645
1646   if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
1647     return false;
1648
1649   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1650     return false;
1651
1652   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1653     return false;
1654
1655   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1656     return false;
1657
1658   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1659
1660   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1661   unsigned HOST_WIDE_INT abs_step
1662     = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr)));
1663
1664   unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
1665   unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
1666   /* Infer the number of iterations with which the memory segment is accessed
1667      by DR.  In other words, alias is checked if memory segment accessed by
1668      DR_A in some iterations intersect with memory segment accessed by DR_B
1669      in the same amount iterations.
1670      Note segnment length is a linear function of number of iterations with
1671      DR_STEP as the coefficient.  */
1672   unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step;
1673   unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step;
1674
1675   unsigned int i;
1676   for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1677     {
1678       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1679       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1680       /* Two indices must be the same if they are not scev, or not scev wrto
1681          current loop being vecorized.  */
1682       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1683           || TREE_CODE (access2) != POLYNOMIAL_CHREC
1684           || CHREC_VARIABLE (access1) != (unsigned)loop->num
1685           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1686         {
1687           if (operand_equal_p (access1, access2, 0))
1688             continue;
1689
1690           return false;
1691         }
1692       /* The two indices must have the same step.  */
1693       if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1694         return false;
1695
1696       tree idx_step = CHREC_RIGHT (access1);
1697       /* Index must have const step, otherwise DR_STEP won't be constant.  */
1698       gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1699       /* Index must evaluate in the same direction as DR.  */
1700       gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1701
1702       tree min1 = CHREC_LEFT (access1);
1703       tree min2 = CHREC_LEFT (access2);
1704       if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1705         return false;
1706
1707       /* Ideally, alias can be checked against loop's control IV, but we
1708          need to prove linear mapping between control IV and reference
1709          index.  Although that should be true, we check against (array)
1710          index of data reference.  Like segment length, index length is
1711          linear function of the number of iterations with index_step as
1712          the coefficient, i.e, niter_len * idx_step.  */
1713       tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1714                                    build_int_cst (TREE_TYPE (min1),
1715                                                   niter_len1));
1716       tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1717                                    build_int_cst (TREE_TYPE (min2),
1718                                                   niter_len2));
1719       tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
1720       tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
1721       /* Adjust ranges for negative step.  */
1722       if (neg_step)
1723         {
1724           min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step);
1725           max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1),
1726                               CHREC_LEFT (access1), idx_step);
1727           min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step);
1728           max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2),
1729                               CHREC_LEFT (access2), idx_step);
1730         }
1731       tree part_cond_expr
1732         = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1733             fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
1734             fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
1735       if (*cond_expr)
1736         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1737                                   *cond_expr, part_cond_expr);
1738       else
1739         *cond_expr = part_cond_expr;
1740     }
1741   return true;
1742 }
1743
1744 /* Given two data references and segment lengths described by DR_A and DR_B,
1745    create expression checking if the two addresses ranges intersect with
1746    each other:
1747
1748      ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
1749      || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0))  */
1750
1751 static void
1752 create_intersect_range_checks (struct loop *loop, tree *cond_expr,
1753                                const dr_with_seg_len& dr_a,
1754                                const dr_with_seg_len& dr_b)
1755 {
1756   *cond_expr = NULL_TREE;
1757   if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
1758     return;
1759
1760   tree segment_length_a = dr_a.seg_len;
1761   tree segment_length_b = dr_b.seg_len;
1762   tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
1763   tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
1764   tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
1765
1766   offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
1767                           offset_a, DR_INIT (dr_a.dr));
1768   offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
1769                           offset_b, DR_INIT (dr_b.dr));
1770   addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
1771   addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
1772
1773   tree seg_a_min = addr_base_a;
1774   tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
1775   /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
1776      bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
1777      [a, a+12) */
1778   if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
1779     {
1780       tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
1781       seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
1782       seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
1783     }
1784
1785   tree seg_b_min = addr_base_b;
1786   tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
1787   if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
1788     {
1789       tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
1790       seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
1791       seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
1792     }
1793   *cond_expr
1794     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1795         fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
1796         fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
1797 }
1798
1799 /* Create a conditional expression that represents the run-time checks for
1800    overlapping of address ranges represented by a list of data references
1801    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
1802    COND_EXPR is the conditional expression to be used in the if statement
1803    that controls which version of the loop gets executed at runtime.  */
1804
1805 void
1806 create_runtime_alias_checks (struct loop *loop,
1807                              vec<dr_with_seg_len_pair_t> *alias_pairs,
1808                              tree * cond_expr)
1809 {
1810   tree part_cond_expr;
1811
1812   for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
1813     {
1814       const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
1815       const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
1816
1817       if (dump_enabled_p ())
1818         {
1819           dump_printf (MSG_NOTE, "create runtime check for data references ");
1820           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
1821           dump_printf (MSG_NOTE, " and ");
1822           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
1823           dump_printf (MSG_NOTE, "\n");
1824         }
1825
1826       /* Create condition expression for each pair data references.  */
1827       create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
1828       if (*cond_expr)
1829         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1830                                   *cond_expr, part_cond_expr);
1831       else
1832         *cond_expr = part_cond_expr;
1833     }
1834 }
1835
1836 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1837    expressions.  */
1838 static bool
1839 dr_equal_offsets_p1 (tree offset1, tree offset2)
1840 {
1841   bool res;
1842
1843   STRIP_NOPS (offset1);
1844   STRIP_NOPS (offset2);
1845
1846   if (offset1 == offset2)
1847     return true;
1848
1849   if (TREE_CODE (offset1) != TREE_CODE (offset2)
1850       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1851     return false;
1852
1853   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1854                              TREE_OPERAND (offset2, 0));
1855
1856   if (!res || !BINARY_CLASS_P (offset1))
1857     return res;
1858
1859   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1860                              TREE_OPERAND (offset2, 1));
1861
1862   return res;
1863 }
1864
1865 /* Check if DRA and DRB have equal offsets.  */
1866 bool
1867 dr_equal_offsets_p (struct data_reference *dra,
1868                     struct data_reference *drb)
1869 {
1870   tree offset1, offset2;
1871
1872   offset1 = DR_OFFSET (dra);
1873   offset2 = DR_OFFSET (drb);
1874
1875   return dr_equal_offsets_p1 (offset1, offset2);
1876 }
1877
1878 /* Returns true if FNA == FNB.  */
1879
1880 static bool
1881 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1882 {
1883   unsigned i, n = fna.length ();
1884
1885   if (n != fnb.length ())
1886     return false;
1887
1888   for (i = 0; i < n; i++)
1889     if (!operand_equal_p (fna[i], fnb[i], 0))
1890       return false;
1891
1892   return true;
1893 }
1894
1895 /* If all the functions in CF are the same, returns one of them,
1896    otherwise returns NULL.  */
1897
1898 static affine_fn
1899 common_affine_function (conflict_function *cf)
1900 {
1901   unsigned i;
1902   affine_fn comm;
1903
1904   if (!CF_NONTRIVIAL_P (cf))
1905     return affine_fn ();
1906
1907   comm = cf->fns[0];
1908
1909   for (i = 1; i < cf->n; i++)
1910     if (!affine_function_equal_p (comm, cf->fns[i]))
1911       return affine_fn ();
1912
1913   return comm;
1914 }
1915
1916 /* Returns the base of the affine function FN.  */
1917
1918 static tree
1919 affine_function_base (affine_fn fn)
1920 {
1921   return fn[0];
1922 }
1923
1924 /* Returns true if FN is a constant.  */
1925
1926 static bool
1927 affine_function_constant_p (affine_fn fn)
1928 {
1929   unsigned i;
1930   tree coef;
1931
1932   for (i = 1; fn.iterate (i, &coef); i++)
1933     if (!integer_zerop (coef))
1934       return false;
1935
1936   return true;
1937 }
1938
1939 /* Returns true if FN is the zero constant function.  */
1940
1941 static bool
1942 affine_function_zero_p (affine_fn fn)
1943 {
1944   return (integer_zerop (affine_function_base (fn))
1945           && affine_function_constant_p (fn));
1946 }
1947
1948 /* Returns a signed integer type with the largest precision from TA
1949    and TB.  */
1950
1951 static tree
1952 signed_type_for_types (tree ta, tree tb)
1953 {
1954   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1955     return signed_type_for (ta);
1956   else
1957     return signed_type_for (tb);
1958 }
1959
1960 /* Applies operation OP on affine functions FNA and FNB, and returns the
1961    result.  */
1962
1963 static affine_fn
1964 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1965 {
1966   unsigned i, n, m;
1967   affine_fn ret;
1968   tree coef;
1969
1970   if (fnb.length () > fna.length ())
1971     {
1972       n = fna.length ();
1973       m = fnb.length ();
1974     }
1975   else
1976     {
1977       n = fnb.length ();
1978       m = fna.length ();
1979     }
1980
1981   ret.create (m);
1982   for (i = 0; i < n; i++)
1983     {
1984       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
1985                                          TREE_TYPE (fnb[i]));
1986       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
1987     }
1988
1989   for (; fna.iterate (i, &coef); i++)
1990     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1991                                  coef, integer_zero_node));
1992   for (; fnb.iterate (i, &coef); i++)
1993     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1994                                  integer_zero_node, coef));
1995
1996   return ret;
1997 }
1998
1999 /* Returns the sum of affine functions FNA and FNB.  */
2000
2001 static affine_fn
2002 affine_fn_plus (affine_fn fna, affine_fn fnb)
2003 {
2004   return affine_fn_op (PLUS_EXPR, fna, fnb);
2005 }
2006
2007 /* Returns the difference of affine functions FNA and FNB.  */
2008
2009 static affine_fn
2010 affine_fn_minus (affine_fn fna, affine_fn fnb)
2011 {
2012   return affine_fn_op (MINUS_EXPR, fna, fnb);
2013 }
2014
2015 /* Frees affine function FN.  */
2016
2017 static void
2018 affine_fn_free (affine_fn fn)
2019 {
2020   fn.release ();
2021 }
2022
2023 /* Determine for each subscript in the data dependence relation DDR
2024    the distance.  */
2025
2026 static void
2027 compute_subscript_distance (struct data_dependence_relation *ddr)
2028 {
2029   conflict_function *cf_a, *cf_b;
2030   affine_fn fn_a, fn_b, diff;
2031
2032   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2033     {
2034       unsigned int i;
2035
2036       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2037         {
2038           struct subscript *subscript;
2039
2040           subscript = DDR_SUBSCRIPT (ddr, i);
2041           cf_a = SUB_CONFLICTS_IN_A (subscript);
2042           cf_b = SUB_CONFLICTS_IN_B (subscript);
2043
2044           fn_a = common_affine_function (cf_a);
2045           fn_b = common_affine_function (cf_b);
2046           if (!fn_a.exists () || !fn_b.exists ())
2047             {
2048               SUB_DISTANCE (subscript) = chrec_dont_know;
2049               return;
2050             }
2051           diff = affine_fn_minus (fn_a, fn_b);
2052
2053           if (affine_function_constant_p (diff))
2054             SUB_DISTANCE (subscript) = affine_function_base (diff);
2055           else
2056             SUB_DISTANCE (subscript) = chrec_dont_know;
2057
2058           affine_fn_free (diff);
2059         }
2060     }
2061 }
2062
2063 /* Returns the conflict function for "unknown".  */
2064
2065 static conflict_function *
2066 conflict_fn_not_known (void)
2067 {
2068   conflict_function *fn = XCNEW (conflict_function);
2069   fn->n = NOT_KNOWN;
2070
2071   return fn;
2072 }
2073
2074 /* Returns the conflict function for "independent".  */
2075
2076 static conflict_function *
2077 conflict_fn_no_dependence (void)
2078 {
2079   conflict_function *fn = XCNEW (conflict_function);
2080   fn->n = NO_DEPENDENCE;
2081
2082   return fn;
2083 }
2084
2085 /* Returns true if the address of OBJ is invariant in LOOP.  */
2086
2087 static bool
2088 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
2089 {
2090   while (handled_component_p (obj))
2091     {
2092       if (TREE_CODE (obj) == ARRAY_REF)
2093         {
2094           /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
2095              need to check the stride and the lower bound of the reference.  */
2096           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2097                                                       loop->num)
2098               || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
2099                                                          loop->num))
2100             return false;
2101         }
2102       else if (TREE_CODE (obj) == COMPONENT_REF)
2103         {
2104           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2105                                                       loop->num))
2106             return false;
2107         }
2108       obj = TREE_OPERAND (obj, 0);
2109     }
2110
2111   if (!INDIRECT_REF_P (obj)
2112       && TREE_CODE (obj) != MEM_REF)
2113     return true;
2114
2115   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2116                                                   loop->num);
2117 }
2118
2119 /* Returns false if we can prove that data references A and B do not alias,
2120    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2121    considered.  */
2122
2123 bool
2124 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2125                 bool loop_nest)
2126 {
2127   tree addr_a = DR_BASE_OBJECT (a);
2128   tree addr_b = DR_BASE_OBJECT (b);
2129
2130   /* If we are not processing a loop nest but scalar code we
2131      do not need to care about possible cross-iteration dependences
2132      and thus can process the full original reference.  Do so,
2133      similar to how loop invariant motion applies extra offset-based
2134      disambiguation.  */
2135   if (!loop_nest)
2136     {
2137       aff_tree off1, off2;
2138       poly_widest_int size1, size2;
2139       get_inner_reference_aff (DR_REF (a), &off1, &size1);
2140       get_inner_reference_aff (DR_REF (b), &off2, &size2);
2141       aff_combination_scale (&off1, -1);
2142       aff_combination_add (&off2, &off1);
2143       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2144         return false;
2145     }
2146
2147   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2148       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2149       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2150       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2151     return false;
2152
2153   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2154      do not know the size of the base-object.  So we cannot do any
2155      offset/overlap based analysis but have to rely on points-to
2156      information only.  */
2157   if (TREE_CODE (addr_a) == MEM_REF
2158       && (DR_UNCONSTRAINED_BASE (a)
2159           || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2160     {
2161       /* For true dependences we can apply TBAA.  */
2162       if (flag_strict_aliasing
2163           && DR_IS_WRITE (a) && DR_IS_READ (b)
2164           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2165                                      get_alias_set (DR_REF (b))))
2166         return false;
2167       if (TREE_CODE (addr_b) == MEM_REF)
2168         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2169                                        TREE_OPERAND (addr_b, 0));
2170       else
2171         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2172                                        build_fold_addr_expr (addr_b));
2173     }
2174   else if (TREE_CODE (addr_b) == MEM_REF
2175            && (DR_UNCONSTRAINED_BASE (b)
2176                || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
2177     {
2178       /* For true dependences we can apply TBAA.  */
2179       if (flag_strict_aliasing
2180           && DR_IS_WRITE (a) && DR_IS_READ (b)
2181           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2182                                      get_alias_set (DR_REF (b))))
2183         return false;
2184       if (TREE_CODE (addr_a) == MEM_REF)
2185         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2186                                        TREE_OPERAND (addr_b, 0));
2187       else
2188         return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
2189                                        TREE_OPERAND (addr_b, 0));
2190     }
2191
2192   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2193      that is being subsetted in the loop nest.  */
2194   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2195     return refs_output_dependent_p (addr_a, addr_b);
2196   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2197     return refs_anti_dependent_p (addr_a, addr_b);
2198   return refs_may_alias_p (addr_a, addr_b);
2199 }
2200
2201 /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
2202    if it is meaningful to compare their associated access functions
2203    when checking for dependencies.  */
2204
2205 static bool
2206 access_fn_components_comparable_p (tree ref_a, tree ref_b)
2207 {
2208   /* Allow pairs of component refs from the following sets:
2209
2210        { REALPART_EXPR, IMAGPART_EXPR }
2211        { COMPONENT_REF }
2212        { ARRAY_REF }.  */
2213   tree_code code_a = TREE_CODE (ref_a);
2214   tree_code code_b = TREE_CODE (ref_b);
2215   if (code_a == IMAGPART_EXPR)
2216     code_a = REALPART_EXPR;
2217   if (code_b == IMAGPART_EXPR)
2218     code_b = REALPART_EXPR;
2219   if (code_a != code_b)
2220     return false;
2221
2222   if (TREE_CODE (ref_a) == COMPONENT_REF)
2223     /* ??? We cannot simply use the type of operand #0 of the refs here as
2224        the Fortran compiler smuggles type punning into COMPONENT_REFs.
2225        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
2226     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
2227             == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
2228
2229   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
2230                              TREE_TYPE (TREE_OPERAND (ref_b, 0)));
2231 }
2232
2233 /* Initialize a data dependence relation between data accesses A and
2234    B.  NB_LOOPS is the number of loops surrounding the references: the
2235    size of the classic distance/direction vectors.  */
2236
2237 struct data_dependence_relation *
2238 initialize_data_dependence_relation (struct data_reference *a,
2239                                      struct data_reference *b,
2240                                      vec<loop_p> loop_nest)
2241 {
2242   struct data_dependence_relation *res;
2243   unsigned int i;
2244
2245   res = XCNEW (struct data_dependence_relation);
2246   DDR_A (res) = a;
2247   DDR_B (res) = b;
2248   DDR_LOOP_NEST (res).create (0);
2249   DDR_SUBSCRIPTS (res).create (0);
2250   DDR_DIR_VECTS (res).create (0);
2251   DDR_DIST_VECTS (res).create (0);
2252
2253   if (a == NULL || b == NULL)
2254     {
2255       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2256       return res;
2257     }
2258
2259   /* If the data references do not alias, then they are independent.  */
2260   if (!dr_may_alias_p (a, b, loop_nest.exists ()))
2261     {
2262       DDR_ARE_DEPENDENT (res) = chrec_known;
2263       return res;
2264     }
2265
2266   unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
2267   unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
2268   if (num_dimensions_a == 0 || num_dimensions_b == 0)
2269     {
2270       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2271       return res;
2272     }
2273
2274   /* For unconstrained bases, the root (highest-indexed) subscript
2275      describes a variation in the base of the original DR_REF rather
2276      than a component access.  We have no type that accurately describes
2277      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
2278      applying this subscript) so limit the search to the last real
2279      component access.
2280
2281      E.g. for:
2282
2283         void
2284         f (int a[][8], int b[][8])
2285         {
2286           for (int i = 0; i < 8; ++i)
2287             a[i * 2][0] = b[i][0];
2288         }
2289
2290      the a and b accesses have a single ARRAY_REF component reference [0]
2291      but have two subscripts.  */
2292   if (DR_UNCONSTRAINED_BASE (a))
2293     num_dimensions_a -= 1;
2294   if (DR_UNCONSTRAINED_BASE (b))
2295     num_dimensions_b -= 1;
2296
2297   /* These structures describe sequences of component references in
2298      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
2299      specific access function.  */
2300   struct {
2301     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
2302        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
2303        indices.  In C notation, these are the indices of the rightmost
2304        component references; e.g. for a sequence .b.c.d, the start
2305        index is for .d.  */
2306     unsigned int start_a;
2307     unsigned int start_b;
2308
2309     /* The sequence contains LENGTH consecutive access functions from
2310        each DR.  */
2311     unsigned int length;
2312
2313     /* The enclosing objects for the A and B sequences respectively,
2314        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
2315        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
2316     tree object_a;
2317     tree object_b;
2318   } full_seq = {}, struct_seq = {};
2319
2320   /* Before each iteration of the loop:
2321
2322      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
2323      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
2324   unsigned int index_a = 0;
2325   unsigned int index_b = 0;
2326   tree ref_a = DR_REF (a);
2327   tree ref_b = DR_REF (b);
2328
2329   /* Now walk the component references from the final DR_REFs back up to
2330      the enclosing base objects.  Each component reference corresponds
2331      to one access function in the DR, with access function 0 being for
2332      the final DR_REF and the highest-indexed access function being the
2333      one that is applied to the base of the DR.
2334
2335      Look for a sequence of component references whose access functions
2336      are comparable (see access_fn_components_comparable_p).  If more
2337      than one such sequence exists, pick the one nearest the base
2338      (which is the leftmost sequence in C notation).  Store this sequence
2339      in FULL_SEQ.
2340
2341      For example, if we have:
2342
2343         struct foo { struct bar s; ... } (*a)[10], (*b)[10];
2344
2345         A: a[0][i].s.c.d
2346         B: __real b[0][i].s.e[i].f
2347
2348      (where d is the same type as the real component of f) then the access
2349      functions would be:
2350
2351                          0   1   2   3
2352         A:              .d  .c  .s [i]
2353
2354                  0   1   2   3   4   5
2355         B:  __real  .f [i]  .e  .s [i]
2356
2357      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
2358      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
2359      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
2360      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
2361      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
2362      index foo[10] arrays, so is again comparable.  The sequence is
2363      therefore:
2364
2365         A: [1, 3]  (i.e. [i].s.c)
2366         B: [3, 5]  (i.e. [i].s.e)
2367
2368      Also look for sequences of component references whose access
2369      functions are comparable and whose enclosing objects have the same
2370      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
2371      example, STRUCT_SEQ would be:
2372
2373         A: [1, 2]  (i.e. s.c)
2374         B: [3, 4]  (i.e. s.e)  */
2375   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
2376     {
2377       /* REF_A and REF_B must be one of the component access types
2378          allowed by dr_analyze_indices.  */
2379       gcc_checking_assert (access_fn_component_p (ref_a));
2380       gcc_checking_assert (access_fn_component_p (ref_b));
2381
2382       /* Get the immediately-enclosing objects for REF_A and REF_B,
2383          i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
2384          and DR_ACCESS_FN (B, INDEX_B).  */
2385       tree object_a = TREE_OPERAND (ref_a, 0);
2386       tree object_b = TREE_OPERAND (ref_b, 0);
2387
2388       tree type_a = TREE_TYPE (object_a);
2389       tree type_b = TREE_TYPE (object_b);
2390       if (access_fn_components_comparable_p (ref_a, ref_b))
2391         {
2392           /* This pair of component accesses is comparable for dependence
2393              analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
2394              DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
2395           if (full_seq.start_a + full_seq.length != index_a
2396               || full_seq.start_b + full_seq.length != index_b)
2397             {
2398               /* The accesses don't extend the current sequence,
2399                  so start a new one here.  */
2400               full_seq.start_a = index_a;
2401               full_seq.start_b = index_b;
2402               full_seq.length = 0;
2403             }
2404
2405           /* Add this pair of references to the sequence.  */
2406           full_seq.length += 1;
2407           full_seq.object_a = object_a;
2408           full_seq.object_b = object_b;
2409
2410           /* If the enclosing objects are structures (and thus have the
2411              same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
2412           if (TREE_CODE (type_a) == RECORD_TYPE)
2413             struct_seq = full_seq;
2414
2415           /* Move to the next containing reference for both A and B.  */
2416           ref_a = object_a;
2417           ref_b = object_b;
2418           index_a += 1;
2419           index_b += 1;
2420           continue;
2421         }
2422
2423       /* Try to approach equal type sizes.  */
2424       if (!COMPLETE_TYPE_P (type_a)
2425           || !COMPLETE_TYPE_P (type_b)
2426           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
2427           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
2428         break;
2429
2430       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
2431       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
2432       if (size_a <= size_b)
2433         {
2434           index_a += 1;
2435           ref_a = object_a;
2436         }
2437       if (size_b <= size_a)
2438         {
2439           index_b += 1;
2440           ref_b = object_b;
2441         }
2442     }
2443
2444   /* See whether FULL_SEQ ends at the base and whether the two bases
2445      are equal.  We do not care about TBAA or alignment info so we can
2446      use OEP_ADDRESS_OF to avoid false negatives.  */
2447   tree base_a = DR_BASE_OBJECT (a);
2448   tree base_b = DR_BASE_OBJECT (b);
2449   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
2450                       && full_seq.start_b + full_seq.length == num_dimensions_b
2451                       && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
2452                       && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
2453                       && types_compatible_p (TREE_TYPE (base_a),
2454                                              TREE_TYPE (base_b))
2455                       && (!loop_nest.exists ()
2456                           || (object_address_invariant_in_loop_p
2457                               (loop_nest[0], base_a))));
2458
2459   /* If the bases are the same, we can include the base variation too.
2460      E.g. the b accesses in:
2461
2462        for (int i = 0; i < n; ++i)
2463          b[i + 4][0] = b[i][0];
2464
2465      have a definite dependence distance of 4, while for:
2466
2467        for (int i = 0; i < n; ++i)
2468          a[i + 4][0] = b[i][0];
2469
2470      the dependence distance depends on the gap between a and b.
2471
2472      If the bases are different then we can only rely on the sequence
2473      rooted at a structure access, since arrays are allowed to overlap
2474      arbitrarily and change shape arbitrarily.  E.g. we treat this as
2475      valid code:
2476
2477        int a[256];
2478        ...
2479        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
2480
2481      where two lvalues with the same int[4][3] type overlap, and where
2482      both lvalues are distinct from the object's declared type.  */
2483   if (same_base_p)
2484     {
2485       if (DR_UNCONSTRAINED_BASE (a))
2486         full_seq.length += 1;
2487     }
2488   else
2489     full_seq = struct_seq;
2490
2491   /* Punt if we didn't find a suitable sequence.  */
2492   if (full_seq.length == 0)
2493     {
2494       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2495       return res;
2496     }
2497
2498   if (!same_base_p)
2499     {
2500       /* Partial overlap is possible for different bases when strict aliasing
2501          is not in effect.  It's also possible if either base involves a union
2502          access; e.g. for:
2503
2504            struct s1 { int a[2]; };
2505            struct s2 { struct s1 b; int c; };
2506            struct s3 { int d; struct s1 e; };
2507            union u { struct s2 f; struct s3 g; } *p, *q;
2508
2509          the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
2510          "p->g.e" (base "p->g") and might partially overlap the s1 at
2511          "q->g.e" (base "q->g").  */
2512       if (!flag_strict_aliasing
2513           || ref_contains_union_access_p (full_seq.object_a)
2514           || ref_contains_union_access_p (full_seq.object_b))
2515         {
2516           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2517           return res;
2518         }
2519
2520       DDR_COULD_BE_INDEPENDENT_P (res) = true;
2521       if (!loop_nest.exists ()
2522           || (object_address_invariant_in_loop_p (loop_nest[0],
2523                                                   full_seq.object_a)
2524               && object_address_invariant_in_loop_p (loop_nest[0],
2525                                                      full_seq.object_b)))
2526         {
2527           DDR_OBJECT_A (res) = full_seq.object_a;
2528           DDR_OBJECT_B (res) = full_seq.object_b;
2529         }
2530     }
2531
2532   DDR_AFFINE_P (res) = true;
2533   DDR_ARE_DEPENDENT (res) = NULL_TREE;
2534   DDR_SUBSCRIPTS (res).create (full_seq.length);
2535   DDR_LOOP_NEST (res) = loop_nest;
2536   DDR_INNER_LOOP (res) = 0;
2537   DDR_SELF_REFERENCE (res) = false;
2538
2539   for (i = 0; i < full_seq.length; ++i)
2540     {
2541       struct subscript *subscript;
2542
2543       subscript = XNEW (struct subscript);
2544       SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
2545       SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
2546       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
2547       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
2548       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
2549       SUB_DISTANCE (subscript) = chrec_dont_know;
2550       DDR_SUBSCRIPTS (res).safe_push (subscript);
2551     }
2552
2553   return res;
2554 }
2555
2556 /* Frees memory used by the conflict function F.  */
2557
2558 static void
2559 free_conflict_function (conflict_function *f)
2560 {
2561   unsigned i;
2562
2563   if (CF_NONTRIVIAL_P (f))
2564     {
2565       for (i = 0; i < f->n; i++)
2566         affine_fn_free (f->fns[i]);
2567     }
2568   free (f);
2569 }
2570
2571 /* Frees memory used by SUBSCRIPTS.  */
2572
2573 static void
2574 free_subscripts (vec<subscript_p> subscripts)
2575 {
2576   unsigned i;
2577   subscript_p s;
2578
2579   FOR_EACH_VEC_ELT (subscripts, i, s)
2580     {
2581       free_conflict_function (s->conflicting_iterations_in_a);
2582       free_conflict_function (s->conflicting_iterations_in_b);
2583       free (s);
2584     }
2585   subscripts.release ();
2586 }
2587
2588 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
2589    description.  */
2590
2591 static inline void
2592 finalize_ddr_dependent (struct data_dependence_relation *ddr,
2593                         tree chrec)
2594 {
2595   DDR_ARE_DEPENDENT (ddr) = chrec;
2596   free_subscripts (DDR_SUBSCRIPTS (ddr));
2597   DDR_SUBSCRIPTS (ddr).create (0);
2598 }
2599
2600 /* The dependence relation DDR cannot be represented by a distance
2601    vector.  */
2602
2603 static inline void
2604 non_affine_dependence_relation (struct data_dependence_relation *ddr)
2605 {
2606   if (dump_file && (dump_flags & TDF_DETAILS))
2607     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
2608
2609   DDR_AFFINE_P (ddr) = false;
2610 }
2611
2612 \f
2613
2614 /* This section contains the classic Banerjee tests.  */
2615
2616 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
2617    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
2618
2619 static inline bool
2620 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2621 {
2622   return (evolution_function_is_constant_p (chrec_a)
2623           && evolution_function_is_constant_p (chrec_b));
2624 }
2625
2626 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
2627    variable, i.e., if the SIV (Single Index Variable) test is true.  */
2628
2629 static bool
2630 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2631 {
2632   if ((evolution_function_is_constant_p (chrec_a)
2633        && evolution_function_is_univariate_p (chrec_b))
2634       || (evolution_function_is_constant_p (chrec_b)
2635           && evolution_function_is_univariate_p (chrec_a)))
2636     return true;
2637
2638   if (evolution_function_is_univariate_p (chrec_a)
2639       && evolution_function_is_univariate_p (chrec_b))
2640     {
2641       switch (TREE_CODE (chrec_a))
2642         {
2643         case POLYNOMIAL_CHREC:
2644           switch (TREE_CODE (chrec_b))
2645             {
2646             case POLYNOMIAL_CHREC:
2647               if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
2648                 return false;
2649               /* FALLTHRU */
2650
2651             default:
2652               return true;
2653             }
2654
2655         default:
2656           return true;
2657         }
2658     }
2659
2660   return false;
2661 }
2662
2663 /* Creates a conflict function with N dimensions.  The affine functions
2664    in each dimension follow.  */
2665
2666 static conflict_function *
2667 conflict_fn (unsigned n, ...)
2668 {
2669   unsigned i;
2670   conflict_function *ret = XCNEW (conflict_function);
2671   va_list ap;
2672
2673   gcc_assert (n > 0 && n <= MAX_DIM);
2674   va_start (ap, n);
2675
2676   ret->n = n;
2677   for (i = 0; i < n; i++)
2678     ret->fns[i] = va_arg (ap, affine_fn);
2679   va_end (ap);
2680
2681   return ret;
2682 }
2683
2684 /* Returns constant affine function with value CST.  */
2685
2686 static affine_fn
2687 affine_fn_cst (tree cst)
2688 {
2689   affine_fn fn;
2690   fn.create (1);
2691   fn.quick_push (cst);
2692   return fn;
2693 }
2694
2695 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
2696
2697 static affine_fn
2698 affine_fn_univar (tree cst, unsigned dim, tree coef)
2699 {
2700   affine_fn fn;
2701   fn.create (dim + 1);
2702   unsigned i;
2703
2704   gcc_assert (dim > 0);
2705   fn.quick_push (cst);
2706   for (i = 1; i < dim; i++)
2707     fn.quick_push (integer_zero_node);
2708   fn.quick_push (coef);
2709   return fn;
2710 }
2711
2712 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
2713    *OVERLAPS_B are initialized to the functions that describe the
2714    relation between the elements accessed twice by CHREC_A and
2715    CHREC_B.  For k >= 0, the following property is verified:
2716
2717    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2718
2719 static void
2720 analyze_ziv_subscript (tree chrec_a,
2721                        tree chrec_b,
2722                        conflict_function **overlaps_a,
2723                        conflict_function **overlaps_b,
2724                        tree *last_conflicts)
2725 {
2726   tree type, difference;
2727   dependence_stats.num_ziv++;
2728
2729   if (dump_file && (dump_flags & TDF_DETAILS))
2730     fprintf (dump_file, "(analyze_ziv_subscript \n");
2731
2732   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2733   chrec_a = chrec_convert (type, chrec_a, NULL);
2734   chrec_b = chrec_convert (type, chrec_b, NULL);
2735   difference = chrec_fold_minus (type, chrec_a, chrec_b);
2736
2737   switch (TREE_CODE (difference))
2738     {
2739     case INTEGER_CST:
2740       if (integer_zerop (difference))
2741         {
2742           /* The difference is equal to zero: the accessed index
2743              overlaps for each iteration in the loop.  */
2744           *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2745           *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2746           *last_conflicts = chrec_dont_know;
2747           dependence_stats.num_ziv_dependent++;
2748         }
2749       else
2750         {
2751           /* The accesses do not overlap.  */
2752           *overlaps_a = conflict_fn_no_dependence ();
2753           *overlaps_b = conflict_fn_no_dependence ();
2754           *last_conflicts = integer_zero_node;
2755           dependence_stats.num_ziv_independent++;
2756         }
2757       break;
2758
2759     default:
2760       /* We're not sure whether the indexes overlap.  For the moment,
2761          conservatively answer "don't know".  */
2762       if (dump_file && (dump_flags & TDF_DETAILS))
2763         fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
2764
2765       *overlaps_a = conflict_fn_not_known ();
2766       *overlaps_b = conflict_fn_not_known ();
2767       *last_conflicts = chrec_dont_know;
2768       dependence_stats.num_ziv_unimplemented++;
2769       break;
2770     }
2771
2772   if (dump_file && (dump_flags & TDF_DETAILS))
2773     fprintf (dump_file, ")\n");
2774 }
2775
2776 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
2777    and only if it fits to the int type.  If this is not the case, or the
2778    bound  on the number of iterations of LOOP could not be derived, returns
2779    chrec_dont_know.  */
2780
2781 static tree
2782 max_stmt_executions_tree (struct loop *loop)
2783 {
2784   widest_int nit;
2785
2786   if (!max_stmt_executions (loop, &nit))
2787     return chrec_dont_know;
2788
2789   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
2790     return chrec_dont_know;
2791
2792   return wide_int_to_tree (unsigned_type_node, nit);
2793 }
2794
2795 /* Determine whether the CHREC is always positive/negative.  If the expression
2796    cannot be statically analyzed, return false, otherwise set the answer into
2797    VALUE.  */
2798
2799 static bool
2800 chrec_is_positive (tree chrec, bool *value)
2801 {
2802   bool value0, value1, value2;
2803   tree end_value, nb_iter;
2804
2805   switch (TREE_CODE (chrec))
2806     {
2807     case POLYNOMIAL_CHREC:
2808       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
2809           || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
2810         return false;
2811
2812       /* FIXME -- overflows.  */
2813       if (value0 == value1)
2814         {
2815           *value = value0;
2816           return true;
2817         }
2818
2819       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
2820          and the proof consists in showing that the sign never
2821          changes during the execution of the loop, from 0 to
2822          loop->nb_iterations.  */
2823       if (!evolution_function_is_affine_p (chrec))
2824         return false;
2825
2826       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
2827       if (chrec_contains_undetermined (nb_iter))
2828         return false;
2829
2830 #if 0
2831       /* TODO -- If the test is after the exit, we may decrease the number of
2832          iterations by one.  */
2833       if (after_exit)
2834         nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
2835 #endif
2836
2837       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
2838
2839       if (!chrec_is_positive (end_value, &value2))
2840         return false;
2841
2842       *value = value0;
2843       return value0 == value1;
2844
2845     case INTEGER_CST:
2846       switch (tree_int_cst_sgn (chrec))
2847         {
2848         case -1:
2849           *value = false;
2850           break;
2851         case 1:
2852           *value = true;
2853           break;
2854         default:
2855           return false;
2856         }
2857       return true;
2858
2859     default:
2860       return false;
2861     }
2862 }
2863
2864
2865 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
2866    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
2867    *OVERLAPS_B are initialized to the functions that describe the
2868    relation between the elements accessed twice by CHREC_A and
2869    CHREC_B.  For k >= 0, the following property is verified:
2870
2871    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2872
2873 static void
2874 analyze_siv_subscript_cst_affine (tree chrec_a,
2875                                   tree chrec_b,
2876                                   conflict_function **overlaps_a,
2877                                   conflict_function **overlaps_b,
2878                                   tree *last_conflicts)
2879 {
2880   bool value0, value1, value2;
2881   tree type, difference, tmp;
2882
2883   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2884   chrec_a = chrec_convert (type, chrec_a, NULL);
2885   chrec_b = chrec_convert (type, chrec_b, NULL);
2886   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
2887
2888   /* Special case overlap in the first iteration.  */
2889   if (integer_zerop (difference))
2890     {
2891       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2892       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2893       *last_conflicts = integer_one_node;
2894       return;
2895     }
2896
2897   if (!chrec_is_positive (initial_condition (difference), &value0))
2898     {
2899       if (dump_file && (dump_flags & TDF_DETAILS))
2900         fprintf (dump_file, "siv test failed: chrec is not positive.\n");
2901
2902       dependence_stats.num_siv_unimplemented++;
2903       *overlaps_a = conflict_fn_not_known ();
2904       *overlaps_b = conflict_fn_not_known ();
2905       *last_conflicts = chrec_dont_know;
2906       return;
2907     }
2908   else
2909     {
2910       if (value0 == false)
2911         {
2912           if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
2913             {
2914               if (dump_file && (dump_flags & TDF_DETAILS))
2915                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
2916
2917               *overlaps_a = conflict_fn_not_known ();
2918               *overlaps_b = conflict_fn_not_known ();
2919               *last_conflicts = chrec_dont_know;
2920               dependence_stats.num_siv_unimplemented++;
2921               return;
2922             }
2923           else
2924             {
2925               if (value1 == true)
2926                 {
2927                   /* Example:
2928                      chrec_a = 12
2929                      chrec_b = {10, +, 1}
2930                   */
2931
2932                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
2933                     {
2934                       HOST_WIDE_INT numiter;
2935                       struct loop *loop = get_chrec_loop (chrec_b);
2936
2937                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2938                       tmp = fold_build2 (EXACT_DIV_EXPR, type,
2939                                          fold_build1 (ABS_EXPR, type, difference),
2940                                          CHREC_RIGHT (chrec_b));
2941                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
2942                       *last_conflicts = integer_one_node;
2943
2944
2945                       /* Perform weak-zero siv test to see if overlap is
2946                          outside the loop bounds.  */
2947                       numiter = max_stmt_executions_int (loop);
2948
2949                       if (numiter >= 0
2950                           && compare_tree_int (tmp, numiter) > 0)
2951                         {
2952                           free_conflict_function (*overlaps_a);
2953                           free_conflict_function (*overlaps_b);
2954                           *overlaps_a = conflict_fn_no_dependence ();
2955                           *overlaps_b = conflict_fn_no_dependence ();
2956                           *last_conflicts = integer_zero_node;
2957                           dependence_stats.num_siv_independent++;
2958                           return;
2959                         }
2960                       dependence_stats.num_siv_dependent++;
2961                       return;
2962                     }
2963
2964                   /* When the step does not divide the difference, there are
2965                      no overlaps.  */
2966                   else
2967                     {
2968                       *overlaps_a = conflict_fn_no_dependence ();
2969                       *overlaps_b = conflict_fn_no_dependence ();
2970                       *last_conflicts = integer_zero_node;
2971                       dependence_stats.num_siv_independent++;
2972                       return;
2973                     }
2974                 }
2975
2976               else
2977                 {
2978                   /* Example:
2979                      chrec_a = 12
2980                      chrec_b = {10, +, -1}
2981
2982                      In this case, chrec_a will not overlap with chrec_b.  */
2983                   *overlaps_a = conflict_fn_no_dependence ();
2984                   *overlaps_b = conflict_fn_no_dependence ();
2985                   *last_conflicts = integer_zero_node;
2986                   dependence_stats.num_siv_independent++;
2987                   return;
2988                 }
2989             }
2990         }
2991       else
2992         {
2993           if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
2994             {
2995               if (dump_file && (dump_flags & TDF_DETAILS))
2996                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
2997
2998               *overlaps_a = conflict_fn_not_known ();
2999               *overlaps_b = conflict_fn_not_known ();
3000               *last_conflicts = chrec_dont_know;
3001               dependence_stats.num_siv_unimplemented++;
3002               return;
3003             }
3004           else
3005             {
3006               if (value2 == false)
3007                 {
3008                   /* Example:
3009                      chrec_a = 3
3010                      chrec_b = {10, +, -1}
3011                   */
3012                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3013                     {
3014                       HOST_WIDE_INT numiter;
3015                       struct loop *loop = get_chrec_loop (chrec_b);
3016
3017                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3018                       tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3019                                          CHREC_RIGHT (chrec_b));
3020                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3021                       *last_conflicts = integer_one_node;
3022
3023                       /* Perform weak-zero siv test to see if overlap is
3024                          outside the loop bounds.  */
3025                       numiter = max_stmt_executions_int (loop);
3026
3027                       if (numiter >= 0
3028                           && compare_tree_int (tmp, numiter) > 0)
3029                         {
3030                           free_conflict_function (*overlaps_a);
3031                           free_conflict_function (*overlaps_b);
3032                           *overlaps_a = conflict_fn_no_dependence ();
3033                           *overlaps_b = conflict_fn_no_dependence ();
3034                           *last_conflicts = integer_zero_node;
3035                           dependence_stats.num_siv_independent++;
3036                           return;
3037                         }
3038                       dependence_stats.num_siv_dependent++;
3039                       return;
3040                     }
3041
3042                   /* When the step does not divide the difference, there
3043                      are no overlaps.  */
3044                   else
3045                     {
3046                       *overlaps_a = conflict_fn_no_dependence ();
3047                       *overlaps_b = conflict_fn_no_dependence ();
3048                       *last_conflicts = integer_zero_node;
3049                       dependence_stats.num_siv_independent++;
3050                       return;
3051                     }
3052                 }
3053               else
3054                 {
3055                   /* Example:
3056                      chrec_a = 3
3057                      chrec_b = {4, +, 1}
3058
3059                      In this case, chrec_a will not overlap with chrec_b.  */
3060                   *overlaps_a = conflict_fn_no_dependence ();
3061                   *overlaps_b = conflict_fn_no_dependence ();
3062                   *last_conflicts = integer_zero_node;
3063                   dependence_stats.num_siv_independent++;
3064                   return;
3065                 }
3066             }
3067         }
3068     }
3069 }
3070
3071 /* Helper recursive function for initializing the matrix A.  Returns
3072    the initial value of CHREC.  */
3073
3074 static tree
3075 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3076 {
3077   gcc_assert (chrec);
3078
3079   switch (TREE_CODE (chrec))
3080     {
3081     case POLYNOMIAL_CHREC:
3082       A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
3083       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3084
3085     case PLUS_EXPR:
3086     case MULT_EXPR:
3087     case MINUS_EXPR:
3088       {
3089         tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3090         tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3091
3092         return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3093       }
3094
3095     CASE_CONVERT:
3096       {
3097         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3098         return chrec_convert (chrec_type (chrec), op, NULL);
3099       }
3100
3101     case BIT_NOT_EXPR:
3102       {
3103         /* Handle ~X as -1 - X.  */
3104         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3105         return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
3106                               build_int_cst (TREE_TYPE (chrec), -1), op);
3107       }
3108
3109     case INTEGER_CST:
3110       return chrec;
3111
3112     default:
3113       gcc_unreachable ();
3114       return NULL_TREE;
3115     }
3116 }
3117
3118 #define FLOOR_DIV(x,y) ((x) / (y))
3119
3120 /* Solves the special case of the Diophantine equation:
3121    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3122
3123    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
3124    number of iterations that loops X and Y run.  The overlaps will be
3125    constructed as evolutions in dimension DIM.  */
3126
3127 static void
3128 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
3129                                          HOST_WIDE_INT step_a,
3130                                          HOST_WIDE_INT step_b,
3131                                          affine_fn *overlaps_a,
3132                                          affine_fn *overlaps_b,
3133                                          tree *last_conflicts, int dim)
3134 {
3135   if (((step_a > 0 && step_b > 0)
3136        || (step_a < 0 && step_b < 0)))
3137     {
3138       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
3139       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
3140
3141       gcd_steps_a_b = gcd (step_a, step_b);
3142       step_overlaps_a = step_b / gcd_steps_a_b;
3143       step_overlaps_b = step_a / gcd_steps_a_b;
3144
3145       if (niter > 0)
3146         {
3147           tau2 = FLOOR_DIV (niter, step_overlaps_a);
3148           tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
3149           last_conflict = tau2;
3150           *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3151         }
3152       else
3153         *last_conflicts = chrec_dont_know;
3154
3155       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
3156                                       build_int_cst (NULL_TREE,
3157                                                      step_overlaps_a));
3158       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
3159                                       build_int_cst (NULL_TREE,
3160                                                      step_overlaps_b));
3161     }
3162
3163   else
3164     {
3165       *overlaps_a = affine_fn_cst (integer_zero_node);
3166       *overlaps_b = affine_fn_cst (integer_zero_node);
3167       *last_conflicts = integer_zero_node;
3168     }
3169 }
3170
3171 /* Solves the special case of a Diophantine equation where CHREC_A is
3172    an affine bivariate function, and CHREC_B is an affine univariate
3173    function.  For example,
3174
3175    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
3176
3177    has the following overlapping functions:
3178
3179    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
3180    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
3181    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
3182
3183    FORNOW: This is a specialized implementation for a case occurring in
3184    a common benchmark.  Implement the general algorithm.  */
3185
3186 static void
3187 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
3188                                       conflict_function **overlaps_a,
3189                                       conflict_function **overlaps_b,
3190                                       tree *last_conflicts)
3191 {
3192   bool xz_p, yz_p, xyz_p;
3193   HOST_WIDE_INT step_x, step_y, step_z;
3194   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
3195   affine_fn overlaps_a_xz, overlaps_b_xz;
3196   affine_fn overlaps_a_yz, overlaps_b_yz;
3197   affine_fn overlaps_a_xyz, overlaps_b_xyz;
3198   affine_fn ova1, ova2, ovb;
3199   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
3200
3201   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
3202   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
3203   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
3204
3205   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
3206   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
3207   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
3208
3209   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
3210     {
3211       if (dump_file && (dump_flags & TDF_DETAILS))
3212         fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
3213
3214       *overlaps_a = conflict_fn_not_known ();
3215       *overlaps_b = conflict_fn_not_known ();
3216       *last_conflicts = chrec_dont_know;
3217       return;
3218     }
3219
3220   niter = MIN (niter_x, niter_z);
3221   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
3222                                            &overlaps_a_xz,
3223                                            &overlaps_b_xz,
3224                                            &last_conflicts_xz, 1);
3225   niter = MIN (niter_y, niter_z);
3226   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
3227                                            &overlaps_a_yz,
3228                                            &overlaps_b_yz,
3229                                            &last_conflicts_yz, 2);
3230   niter = MIN (niter_x, niter_z);
3231   niter = MIN (niter_y, niter);
3232   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
3233                                            &overlaps_a_xyz,
3234                                            &overlaps_b_xyz,
3235                                            &last_conflicts_xyz, 3);
3236
3237   xz_p = !integer_zerop (last_conflicts_xz);
3238   yz_p = !integer_zerop (last_conflicts_yz);
3239   xyz_p = !integer_zerop (last_conflicts_xyz);
3240
3241   if (xz_p || yz_p || xyz_p)
3242     {
3243       ova1 = affine_fn_cst (integer_zero_node);
3244       ova2 = affine_fn_cst (integer_zero_node);
3245       ovb = affine_fn_cst (integer_zero_node);
3246       if (xz_p)
3247         {
3248           affine_fn t0 = ova1;
3249           affine_fn t2 = ovb;
3250
3251           ova1 = affine_fn_plus (ova1, overlaps_a_xz);
3252           ovb = affine_fn_plus (ovb, overlaps_b_xz);
3253           affine_fn_free (t0);
3254           affine_fn_free (t2);
3255           *last_conflicts = last_conflicts_xz;
3256         }
3257       if (yz_p)
3258         {
3259           affine_fn t0 = ova2;
3260           affine_fn t2 = ovb;
3261
3262           ova2 = affine_fn_plus (ova2, overlaps_a_yz);
3263           ovb = affine_fn_plus (ovb, overlaps_b_yz);
3264           affine_fn_free (t0);
3265           affine_fn_free (t2);
3266           *last_conflicts = last_conflicts_yz;
3267         }
3268       if (xyz_p)
3269         {
3270           affine_fn t0 = ova1;
3271           affine_fn t2 = ova2;
3272           affine_fn t4 = ovb;
3273
3274           ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
3275           ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
3276           ovb = affine_fn_plus (ovb, overlaps_b_xyz);
3277           affine_fn_free (t0);
3278           affine_fn_free (t2);
3279           affine_fn_free (t4);
3280           *last_conflicts = last_conflicts_xyz;
3281         }
3282       *overlaps_a = conflict_fn (2, ova1, ova2);
3283       *overlaps_b = conflict_fn (1, ovb);
3284     }
3285   else
3286     {
3287       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3288       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3289       *last_conflicts = integer_zero_node;
3290     }
3291
3292   affine_fn_free (overlaps_a_xz);
3293   affine_fn_free (overlaps_b_xz);
3294   affine_fn_free (overlaps_a_yz);
3295   affine_fn_free (overlaps_b_yz);
3296   affine_fn_free (overlaps_a_xyz);
3297   affine_fn_free (overlaps_b_xyz);
3298 }
3299
3300 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
3301
3302 static void
3303 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
3304                     int size)
3305 {
3306   memcpy (vec2, vec1, size * sizeof (*vec1));
3307 }
3308
3309 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
3310
3311 static void
3312 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
3313                     int m, int n)
3314 {
3315   int i;
3316
3317   for (i = 0; i < m; i++)
3318     lambda_vector_copy (mat1[i], mat2[i], n);
3319 }
3320
3321 /* Store the N x N identity matrix in MAT.  */
3322
3323 static void
3324 lambda_matrix_id (lambda_matrix mat, int size)
3325 {
3326   int i, j;
3327
3328   for (i = 0; i < size; i++)
3329     for (j = 0; j < size; j++)
3330       mat[i][j] = (i == j) ? 1 : 0;
3331 }
3332
3333 /* Return the first nonzero element of vector VEC1 between START and N.
3334    We must have START <= N.   Returns N if VEC1 is the zero vector.  */
3335
3336 static int
3337 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
3338 {
3339   int j = start;
3340   while (j < n && vec1[j] == 0)
3341     j++;
3342   return j;
3343 }
3344
3345 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3346    R2 = R2 + CONST1 * R1.  */
3347
3348 static void
3349 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
3350 {
3351   int i;
3352
3353   if (const1 == 0)
3354     return;
3355
3356   for (i = 0; i < n; i++)
3357     mat[r2][i] += const1 * mat[r1][i];
3358 }
3359
3360 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3361    and store the result in VEC2.  */
3362
3363 static void
3364 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
3365                           int size, int const1)
3366 {
3367   int i;
3368
3369   if (const1 == 0)
3370     lambda_vector_clear (vec2, size);
3371   else
3372     for (i = 0; i < size; i++)
3373       vec2[i] = const1 * vec1[i];
3374 }
3375
3376 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
3377
3378 static void
3379 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
3380                       int size)
3381 {
3382   lambda_vector_mult_const (vec1, vec2, size, -1);
3383 }
3384
3385 /* Negate row R1 of matrix MAT which has N columns.  */
3386
3387 static void
3388 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
3389 {
3390   lambda_vector_negate (mat[r1], mat[r1], n);
3391 }
3392
3393 /* Return true if two vectors are equal.  */
3394
3395 static bool
3396 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
3397 {
3398   int i;
3399   for (i = 0; i < size; i++)
3400     if (vec1[i] != vec2[i])
3401       return false;
3402   return true;
3403 }
3404
3405 /* Given an M x N integer matrix A, this function determines an M x
3406    M unimodular matrix U, and an M x N echelon matrix S such that
3407    "U.A = S".  This decomposition is also known as "right Hermite".
3408
3409    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3410    Restructuring Compilers" Utpal Banerjee.  */
3411
3412 static void
3413 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
3414                              lambda_matrix S, lambda_matrix U)
3415 {
3416   int i, j, i0 = 0;
3417
3418   lambda_matrix_copy (A, S, m, n);
3419   lambda_matrix_id (U, m);
3420
3421   for (j = 0; j < n; j++)
3422     {
3423       if (lambda_vector_first_nz (S[j], m, i0) < m)
3424         {
3425           ++i0;
3426           for (i = m - 1; i >= i0; i--)
3427             {
3428               while (S[i][j] != 0)
3429                 {
3430                   int sigma, factor, a, b;
3431
3432                   a = S[i-1][j];
3433                   b = S[i][j];
3434                   sigma = (a * b < 0) ? -1: 1;
3435                   a = abs (a);
3436                   b = abs (b);
3437                   factor = sigma * (a / b);
3438
3439                   lambda_matrix_row_add (S, n, i, i-1, -factor);
3440                   std::swap (S[i], S[i-1]);
3441
3442                   lambda_matrix_row_add (U, m, i, i-1, -factor);
3443                   std::swap (U[i], U[i-1]);
3444                 }
3445             }
3446         }
3447     }
3448 }
3449
3450 /* Determines the overlapping elements due to accesses CHREC_A and
3451    CHREC_B, that are affine functions.  This function cannot handle
3452    symbolic evolution functions, ie. when initial conditions are
3453    parameters, because it uses lambda matrices of integers.  */
3454
3455 static void
3456 analyze_subscript_affine_affine (tree chrec_a,
3457                                  tree chrec_b,
3458                                  conflict_function **overlaps_a,
3459                                  conflict_function **overlaps_b,
3460                                  tree *last_conflicts)
3461 {
3462   unsigned nb_vars_a, nb_vars_b, dim;
3463   HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
3464   lambda_matrix A, U, S;
3465   struct obstack scratch_obstack;
3466
3467   if (eq_evolutions_p (chrec_a, chrec_b))
3468     {
3469       /* The accessed index overlaps for each iteration in the
3470          loop.  */
3471       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3472       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3473       *last_conflicts = chrec_dont_know;
3474       return;
3475     }
3476   if (dump_file && (dump_flags & TDF_DETAILS))
3477     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
3478
3479   /* For determining the initial intersection, we have to solve a
3480      Diophantine equation.  This is the most time consuming part.
3481
3482      For answering to the question: "Is there a dependence?" we have
3483      to prove that there exists a solution to the Diophantine
3484      equation, and that the solution is in the iteration domain,
3485      i.e. the solution is positive or zero, and that the solution
3486      happens before the upper bound loop.nb_iterations.  Otherwise
3487      there is no dependence.  This function outputs a description of
3488      the iterations that hold the intersections.  */
3489
3490   nb_vars_a = nb_vars_in_chrec (chrec_a);
3491   nb_vars_b = nb_vars_in_chrec (chrec_b);
3492
3493   gcc_obstack_init (&scratch_obstack);
3494
3495   dim = nb_vars_a + nb_vars_b;
3496   U = lambda_matrix_new (dim, dim, &scratch_obstack);
3497   A = lambda_matrix_new (dim, 1, &scratch_obstack);
3498   S = lambda_matrix_new (dim, 1, &scratch_obstack);
3499
3500   init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
3501   init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
3502   gamma = init_b - init_a;
3503
3504   /* Don't do all the hard work of solving the Diophantine equation
3505      when we already know the solution: for example,
3506      | {3, +, 1}_1
3507      | {3, +, 4}_2
3508      | gamma = 3 - 3 = 0.
3509      Then the first overlap occurs during the first iterations:
3510      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
3511   */
3512   if (gamma == 0)
3513     {
3514       if (nb_vars_a == 1 && nb_vars_b == 1)
3515         {
3516           HOST_WIDE_INT step_a, step_b;
3517           HOST_WIDE_INT niter, niter_a, niter_b;
3518           affine_fn ova, ovb;
3519
3520           niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
3521           niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
3522           niter = MIN (niter_a, niter_b);
3523           step_a = int_cst_value (CHREC_RIGHT (chrec_a));
3524           step_b = int_cst_value (CHREC_RIGHT (chrec_b));
3525
3526           compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
3527                                                    &ova, &ovb,
3528                                                    last_conflicts, 1);
3529           *overlaps_a = conflict_fn (1, ova);
3530           *overlaps_b = conflict_fn (1, ovb);
3531         }
3532
3533       else if (nb_vars_a == 2 && nb_vars_b == 1)
3534         compute_overlap_steps_for_affine_1_2
3535           (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
3536
3537       else if (nb_vars_a == 1 && nb_vars_b == 2)
3538         compute_overlap_steps_for_affine_1_2
3539           (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
3540
3541       else
3542         {
3543           if (dump_file && (dump_flags & TDF_DETAILS))
3544             fprintf (dump_file, "affine-affine test failed: too many variables.\n");
3545           *overlaps_a = conflict_fn_not_known ();
3546           *overlaps_b = conflict_fn_not_known ();
3547           *last_conflicts = chrec_dont_know;
3548         }
3549       goto end_analyze_subs_aa;
3550     }
3551
3552   /* U.A = S */
3553   lambda_matrix_right_hermite (A, dim, 1, S, U);
3554
3555   if (S[0][0] < 0)
3556     {
3557       S[0][0] *= -1;
3558       lambda_matrix_row_negate (U, dim, 0);
3559     }
3560   gcd_alpha_beta = S[0][0];
3561
3562   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
3563      but that is a quite strange case.  Instead of ICEing, answer
3564      don't know.  */
3565   if (gcd_alpha_beta == 0)
3566     {
3567       *overlaps_a = conflict_fn_not_known ();
3568       *overlaps_b = conflict_fn_not_known ();
3569       *last_conflicts = chrec_dont_know;
3570       goto end_analyze_subs_aa;
3571     }
3572
3573   /* The classic "gcd-test".  */
3574   if (!int_divides_p (gcd_alpha_beta, gamma))
3575     {
3576       /* The "gcd-test" has determined that there is no integer
3577          solution, i.e. there is no dependence.  */
3578       *overlaps_a = conflict_fn_no_dependence ();
3579       *overlaps_b = conflict_fn_no_dependence ();
3580       *last_conflicts = integer_zero_node;
3581     }
3582
3583   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
3584   else if (nb_vars_a == 1 && nb_vars_b == 1)
3585     {
3586       /* Both functions should have the same evolution sign.  */
3587       if (((A[0][0] > 0 && -A[1][0] > 0)
3588            || (A[0][0] < 0 && -A[1][0] < 0)))
3589         {
3590           /* The solutions are given by:
3591              |
3592              | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
3593              |                           [u21 u22]    [y0]
3594
3595              For a given integer t.  Using the following variables,
3596
3597              | i0 = u11 * gamma / gcd_alpha_beta
3598              | j0 = u12 * gamma / gcd_alpha_beta
3599              | i1 = u21
3600              | j1 = u22
3601
3602              the solutions are:
3603
3604              | x0 = i0 + i1 * t,
3605              | y0 = j0 + j1 * t.  */
3606           HOST_WIDE_INT i0, j0, i1, j1;
3607
3608           i0 = U[0][0] * gamma / gcd_alpha_beta;
3609           j0 = U[0][1] * gamma / gcd_alpha_beta;
3610           i1 = U[1][0];
3611           j1 = U[1][1];
3612
3613           if ((i1 == 0 && i0 < 0)
3614               || (j1 == 0 && j0 < 0))
3615             {
3616               /* There is no solution.
3617                  FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
3618                  falls in here, but for the moment we don't look at the
3619                  upper bound of the iteration domain.  */
3620               *overlaps_a = conflict_fn_no_dependence ();
3621               *overlaps_b = conflict_fn_no_dependence ();
3622               *last_conflicts = integer_zero_node;
3623               goto end_analyze_subs_aa;
3624             }
3625
3626           if (i1 > 0 && j1 > 0)
3627             {
3628               HOST_WIDE_INT niter_a
3629                 = max_stmt_executions_int (get_chrec_loop (chrec_a));
3630               HOST_WIDE_INT niter_b
3631                 = max_stmt_executions_int (get_chrec_loop (chrec_b));
3632               HOST_WIDE_INT niter = MIN (niter_a, niter_b);
3633
3634               /* (X0, Y0) is a solution of the Diophantine equation:
3635                  "chrec_a (X0) = chrec_b (Y0)".  */
3636               HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
3637                                         CEIL (-j0, j1));
3638               HOST_WIDE_INT x0 = i1 * tau1 + i0;
3639               HOST_WIDE_INT y0 = j1 * tau1 + j0;
3640
3641               /* (X1, Y1) is the smallest positive solution of the eq
3642                  "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
3643                  first conflict occurs.  */
3644               HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
3645               HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
3646               HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
3647
3648               if (niter > 0)
3649                 {
3650                   HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter_a - i0, i1),
3651                                             FLOOR_DIV (niter_b - j0, j1));
3652                   HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
3653
3654                   /* If the overlap occurs outside of the bounds of the
3655                      loop, there is no dependence.  */
3656                   if (x1 >= niter_a || y1 >= niter_b)
3657                     {
3658                       *overlaps_a = conflict_fn_no_dependence ();
3659                       *overlaps_b = conflict_fn_no_dependence ();
3660                       *last_conflicts = integer_zero_node;
3661                       goto end_analyze_subs_aa;
3662                     }
3663                   else
3664                     *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3665                 }
3666               else
3667                 *last_conflicts = chrec_dont_know;
3668
3669               *overlaps_a
3670                 = conflict_fn (1,
3671                                affine_fn_univar (build_int_cst (NULL_TREE, x1),
3672                                                  1,
3673                                                  build_int_cst (NULL_TREE, i1)));
3674               *overlaps_b
3675                 = conflict_fn (1,
3676                                affine_fn_univar (build_int_cst (NULL_TREE, y1),
3677                                                  1,
3678                                                  build_int_cst (NULL_TREE, j1)));
3679             }
3680           else
3681             {
3682               /* FIXME: For the moment, the upper bound of the
3683                  iteration domain for i and j is not checked.  */
3684               if (dump_file && (dump_flags & TDF_DETAILS))
3685                 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3686               *overlaps_a = conflict_fn_not_known ();
3687               *overlaps_b = conflict_fn_not_known ();
3688               *last_conflicts = chrec_dont_know;
3689             }
3690         }
3691       else
3692         {
3693           if (dump_file && (dump_flags & TDF_DETAILS))
3694             fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3695           *overlaps_a = conflict_fn_not_known ();
3696           *overlaps_b = conflict_fn_not_known ();
3697           *last_conflicts = chrec_dont_know;
3698         }
3699     }
3700   else
3701     {
3702       if (dump_file && (dump_flags & TDF_DETAILS))
3703         fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3704       *overlaps_a = conflict_fn_not_known ();
3705       *overlaps_b = conflict_fn_not_known ();
3706       *last_conflicts = chrec_dont_know;
3707     }
3708
3709 end_analyze_subs_aa:
3710   obstack_free (&scratch_obstack, NULL);
3711   if (dump_file && (dump_flags & TDF_DETAILS))
3712     {
3713       fprintf (dump_file, "  (overlaps_a = ");
3714       dump_conflict_function (dump_file, *overlaps_a);
3715       fprintf (dump_file, ")\n  (overlaps_b = ");
3716       dump_conflict_function (dump_file, *overlaps_b);
3717       fprintf (dump_file, "))\n");
3718     }
3719 }
3720
3721 /* Returns true when analyze_subscript_affine_affine can be used for
3722    determining the dependence relation between chrec_a and chrec_b,
3723    that contain symbols.  This function modifies chrec_a and chrec_b
3724    such that the analysis result is the same, and such that they don't
3725    contain symbols, and then can safely be passed to the analyzer.
3726
3727    Example: The analysis of the following tuples of evolutions produce
3728    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
3729    vs. {0, +, 1}_1
3730
3731    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
3732    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
3733 */
3734
3735 static bool
3736 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
3737 {
3738   tree diff, type, left_a, left_b, right_b;
3739
3740   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
3741       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
3742     /* FIXME: For the moment not handled.  Might be refined later.  */
3743     return false;
3744
3745   type = chrec_type (*chrec_a);
3746   left_a = CHREC_LEFT (*chrec_a);
3747   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
3748   diff = chrec_fold_minus (type, left_a, left_b);
3749
3750   if (!evolution_function_is_constant_p (diff))
3751     return false;
3752
3753   if (dump_file && (dump_flags & TDF_DETAILS))
3754     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
3755
3756   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
3757                                      diff, CHREC_RIGHT (*chrec_a));
3758   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
3759   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
3760                                      build_int_cst (type, 0),
3761                                      right_b);
3762   return true;
3763 }
3764
3765 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
3766    *OVERLAPS_B are initialized to the functions that describe the
3767    relation between the elements accessed twice by CHREC_A and
3768    CHREC_B.  For k >= 0, the following property is verified:
3769
3770    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3771
3772 static void
3773 analyze_siv_subscript (tree chrec_a,
3774                        tree chrec_b,
3775                        conflict_function **overlaps_a,
3776                        conflict_function **overlaps_b,
3777                        tree *last_conflicts,
3778                        int loop_nest_num)
3779 {
3780   dependence_stats.num_siv++;
3781
3782   if (dump_file && (dump_flags & TDF_DETAILS))
3783     fprintf (dump_file, "(analyze_siv_subscript \n");
3784
3785   if (evolution_function_is_constant_p (chrec_a)
3786       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3787     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
3788                                       overlaps_a, overlaps_b, last_conflicts);
3789
3790   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3791            && evolution_function_is_constant_p (chrec_b))
3792     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
3793                                       overlaps_b, overlaps_a, last_conflicts);
3794
3795   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3796            && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3797     {
3798       if (!chrec_contains_symbols (chrec_a)
3799           && !chrec_contains_symbols (chrec_b))
3800         {
3801           analyze_subscript_affine_affine (chrec_a, chrec_b,
3802                                            overlaps_a, overlaps_b,
3803                                            last_conflicts);
3804
3805           if (CF_NOT_KNOWN_P (*overlaps_a)
3806               || CF_NOT_KNOWN_P (*overlaps_b))
3807             dependence_stats.num_siv_unimplemented++;
3808           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3809                    || CF_NO_DEPENDENCE_P (*overlaps_b))
3810             dependence_stats.num_siv_independent++;
3811           else
3812             dependence_stats.num_siv_dependent++;
3813         }
3814       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
3815                                                         &chrec_b))
3816         {
3817           analyze_subscript_affine_affine (chrec_a, chrec_b,
3818                                            overlaps_a, overlaps_b,
3819                                            last_conflicts);
3820
3821           if (CF_NOT_KNOWN_P (*overlaps_a)
3822               || CF_NOT_KNOWN_P (*overlaps_b))
3823             dependence_stats.num_siv_unimplemented++;
3824           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3825                    || CF_NO_DEPENDENCE_P (*overlaps_b))
3826             dependence_stats.num_siv_independent++;
3827           else
3828             dependence_stats.num_siv_dependent++;
3829         }
3830       else
3831         goto siv_subscript_dontknow;
3832     }
3833
3834   else
3835     {
3836     siv_subscript_dontknow:;
3837       if (dump_file && (dump_flags & TDF_DETAILS))
3838         fprintf (dump_file, "  siv test failed: unimplemented");
3839       *overlaps_a = conflict_fn_not_known ();
3840       *overlaps_b = conflict_fn_not_known ();
3841       *last_conflicts = chrec_dont_know;
3842       dependence_stats.num_siv_unimplemented++;
3843     }
3844
3845   if (dump_file && (dump_flags & TDF_DETAILS))
3846     fprintf (dump_file, ")\n");
3847 }
3848
3849 /* Returns false if we can prove that the greatest common divisor of the steps
3850    of CHREC does not divide CST, false otherwise.  */
3851
3852 static bool
3853 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
3854 {
3855   HOST_WIDE_INT cd = 0, val;
3856   tree step;
3857
3858   if (!tree_fits_shwi_p (cst))
3859     return true;
3860   val = tree_to_shwi (cst);
3861
3862   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
3863     {
3864       step = CHREC_RIGHT (chrec);
3865       if (!tree_fits_shwi_p (step))
3866         return true;
3867       cd = gcd (cd, tree_to_shwi (step));
3868       chrec = CHREC_LEFT (chrec);
3869     }
3870
3871   return val % cd == 0;
3872 }
3873
3874 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
3875    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
3876    functions that describe the relation between the elements accessed
3877    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
3878    is verified:
3879
3880    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3881
3882 static void
3883 analyze_miv_subscript (tree chrec_a,
3884                        tree chrec_b,
3885                        conflict_function **overlaps_a,
3886                        conflict_function **overlaps_b,
3887                        tree *last_conflicts,
3888                        struct loop *loop_nest)
3889 {
3890   tree type, difference;
3891
3892   dependence_stats.num_miv++;
3893   if (dump_file && (dump_flags & TDF_DETAILS))
3894     fprintf (dump_file, "(analyze_miv_subscript \n");
3895
3896   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3897   chrec_a = chrec_convert (type, chrec_a, NULL);
3898   chrec_b = chrec_convert (type, chrec_b, NULL);
3899   difference = chrec_fold_minus (type, chrec_a, chrec_b);
3900
3901   if (eq_evolutions_p (chrec_a, chrec_b))
3902     {
3903       /* Access functions are the same: all the elements are accessed
3904          in the same order.  */
3905       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3906       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3907       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
3908       dependence_stats.num_miv_dependent++;
3909     }
3910
3911   else if (evolution_function_is_constant_p (difference)
3912            /* For the moment, the following is verified:
3913               evolution_function_is_affine_multivariate_p (chrec_a,
3914               loop_nest->num) */
3915            && !gcd_of_steps_may_divide_p (chrec_a, difference))
3916     {
3917       /* testsuite/.../ssa-chrec-33.c
3918          {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
3919
3920          The difference is 1, and all the evolution steps are multiples
3921          of 2, consequently there are no overlapping elements.  */
3922       *overlaps_a = conflict_fn_no_dependence ();
3923       *overlaps_b = conflict_fn_no_dependence ();
3924       *last_conflicts = integer_zero_node;
3925       dependence_stats.num_miv_independent++;
3926     }
3927
3928   else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
3929            && !chrec_contains_symbols (chrec_a)
3930            && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
3931            && !chrec_contains_symbols (chrec_b))
3932     {
3933       /* testsuite/.../ssa-chrec-35.c
3934          {0, +, 1}_2  vs.  {0, +, 1}_3
3935          the overlapping elements are respectively located at iterations:
3936          {0, +, 1}_x and {0, +, 1}_x,
3937          in other words, we have the equality:
3938          {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
3939
3940          Other examples:
3941          {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
3942          {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
3943
3944          {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
3945          {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
3946       */
3947       analyze_subscript_affine_affine (chrec_a, chrec_b,
3948                                        overlaps_a, overlaps_b, last_conflicts);
3949
3950       if (CF_NOT_KNOWN_P (*overlaps_a)
3951           || CF_NOT_KNOWN_P (*overlaps_b))
3952         dependence_stats.num_miv_unimplemented++;
3953       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3954                || CF_NO_DEPENDENCE_P (*overlaps_b))
3955         dependence_stats.num_miv_independent++;
3956       else
3957         dependence_stats.num_miv_dependent++;
3958     }
3959
3960   else
3961     {
3962       /* When the analysis is too difficult, answer "don't know".  */
3963       if (dump_file && (dump_flags & TDF_DETAILS))
3964         fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
3965
3966       *overlaps_a = conflict_fn_not_known ();
3967       *overlaps_b = conflict_fn_not_known ();
3968       *last_conflicts = chrec_dont_know;
3969       dependence_stats.num_miv_unimplemented++;
3970     }
3971
3972   if (dump_file && (dump_flags & TDF_DETAILS))
3973     fprintf (dump_file, ")\n");
3974 }
3975
3976 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
3977    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
3978    OVERLAP_ITERATIONS_B are initialized with two functions that
3979    describe the iterations that contain conflicting elements.
3980
3981    Remark: For an integer k >= 0, the following equality is true:
3982
3983    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
3984 */
3985
3986 static void
3987 analyze_overlapping_iterations (tree chrec_a,
3988                                 tree chrec_b,
3989                                 conflict_function **overlap_iterations_a,
3990                                 conflict_function **overlap_iterations_b,
3991                                 tree *last_conflicts, struct loop *loop_nest)
3992 {
3993   unsigned int lnn = loop_nest->num;
3994
3995   dependence_stats.num_subscript_tests++;
3996
3997   if (dump_file && (dump_flags & TDF_DETAILS))
3998     {
3999       fprintf (dump_file, "(analyze_overlapping_iterations \n");
4000       fprintf (dump_file, "  (chrec_a = ");
4001       print_generic_expr (dump_file, chrec_a);
4002       fprintf (dump_file, ")\n  (chrec_b = ");
4003       print_generic_expr (dump_file, chrec_b);
4004       fprintf (dump_file, ")\n");
4005     }
4006
4007   if (chrec_a == NULL_TREE
4008       || chrec_b == NULL_TREE
4009       || chrec_contains_undetermined (chrec_a)
4010       || chrec_contains_undetermined (chrec_b))
4011     {
4012       dependence_stats.num_subscript_undetermined++;
4013
4014       *overlap_iterations_a = conflict_fn_not_known ();
4015       *overlap_iterations_b = conflict_fn_not_known ();
4016     }
4017
4018   /* If they are the same chrec, and are affine, they overlap
4019      on every iteration.  */
4020   else if (eq_evolutions_p (chrec_a, chrec_b)
4021            && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4022                || operand_equal_p (chrec_a, chrec_b, 0)))
4023     {
4024       dependence_stats.num_same_subscript_function++;
4025       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4026       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4027       *last_conflicts = chrec_dont_know;
4028     }
4029
4030   /* If they aren't the same, and aren't affine, we can't do anything
4031      yet.  */
4032   else if ((chrec_contains_symbols (chrec_a)
4033             || chrec_contains_symbols (chrec_b))
4034            && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4035                || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4036     {
4037       dependence_stats.num_subscript_undetermined++;
4038       *overlap_iterations_a = conflict_fn_not_known ();
4039       *overlap_iterations_b = conflict_fn_not_known ();
4040     }
4041
4042   else if (ziv_subscript_p (chrec_a, chrec_b))
4043     analyze_ziv_subscript (chrec_a, chrec_b,
4044                            overlap_iterations_a, overlap_iterations_b,
4045                            last_conflicts);
4046
4047   else if (siv_subscript_p (chrec_a, chrec_b))
4048     analyze_siv_subscript (chrec_a, chrec_b,
4049                            overlap_iterations_a, overlap_iterations_b,
4050                            last_conflicts, lnn);
4051
4052   else
4053     analyze_miv_subscript (chrec_a, chrec_b,
4054                            overlap_iterations_a, overlap_iterations_b,
4055                            last_conflicts, loop_nest);
4056
4057   if (dump_file && (dump_flags & TDF_DETAILS))
4058     {
4059       fprintf (dump_file, "  (overlap_iterations_a = ");
4060       dump_conflict_function (dump_file, *overlap_iterations_a);
4061       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
4062       dump_conflict_function (dump_file, *overlap_iterations_b);
4063       fprintf (dump_file, "))\n");
4064     }
4065 }
4066
4067 /* Helper function for uniquely inserting distance vectors.  */
4068
4069 static void
4070 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
4071 {
4072   unsigned i;
4073   lambda_vector v;
4074
4075   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
4076     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
4077       return;
4078
4079   DDR_DIST_VECTS (ddr).safe_push (dist_v);
4080 }
4081
4082 /* Helper function for uniquely inserting direction vectors.  */
4083
4084 static void
4085 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
4086 {
4087   unsigned i;
4088   lambda_vector v;
4089
4090   FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
4091     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
4092       return;
4093
4094   DDR_DIR_VECTS (ddr).safe_push (dir_v);
4095 }
4096
4097 /* Add a distance of 1 on all the loops outer than INDEX.  If we
4098    haven't yet determined a distance for this outer loop, push a new
4099    distance vector composed of the previous distance, and a distance
4100    of 1 for this outer loop.  Example:
4101
4102    | loop_1
4103    |   loop_2
4104    |     A[10]
4105    |   endloop_2
4106    | endloop_1
4107
4108    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
4109    save (0, 1), then we have to save (1, 0).  */
4110
4111 static void
4112 add_outer_distances (struct data_dependence_relation *ddr,
4113                      lambda_vector dist_v, int index)
4114 {
4115   /* For each outer loop where init_v is not set, the accesses are
4116      in dependence of distance 1 in the loop.  */
4117   while (--index >= 0)
4118     {
4119       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4120       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4121       save_v[index] = 1;
4122       save_dist_v (ddr, save_v);
4123     }
4124 }
4125
4126 /* Return false when fail to represent the data dependence as a
4127    distance vector.  A_INDEX is the index of the first reference
4128    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
4129    second reference.  INIT_B is set to true when a component has been
4130    added to the distance vector DIST_V.  INDEX_CARRY is then set to
4131    the index in DIST_V that carries the dependence.  */
4132
4133 static bool
4134 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
4135                              unsigned int a_index, unsigned int b_index,
4136                              lambda_vector dist_v, bool *init_b,
4137                              int *index_carry)
4138 {
4139   unsigned i;
4140   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4141
4142   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4143     {
4144       tree access_fn_a, access_fn_b;
4145       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
4146
4147       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4148         {
4149           non_affine_dependence_relation (ddr);
4150           return false;
4151         }
4152
4153       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
4154       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
4155
4156       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
4157           && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
4158         {
4159           HOST_WIDE_INT dist;
4160           int index;
4161           int var_a = CHREC_VARIABLE (access_fn_a);
4162           int var_b = CHREC_VARIABLE (access_fn_b);
4163
4164           if (var_a != var_b
4165               || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4166             {
4167               non_affine_dependence_relation (ddr);
4168               return false;
4169             }
4170
4171           dist = int_cst_value (SUB_DISTANCE (subscript));
4172           index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
4173           *index_carry = MIN (index, *index_carry);
4174
4175           /* This is the subscript coupling test.  If we have already
4176              recorded a distance for this loop (a distance coming from
4177              another subscript), it should be the same.  For example,
4178              in the following code, there is no dependence:
4179
4180              | loop i = 0, N, 1
4181              |   T[i+1][i] = ...
4182              |   ... = T[i][i]
4183              | endloop
4184           */
4185           if (init_v[index] != 0 && dist_v[index] != dist)
4186             {
4187               finalize_ddr_dependent (ddr, chrec_known);
4188               return false;
4189             }
4190
4191           dist_v[index] = dist;
4192           init_v[index] = 1;
4193           *init_b = true;
4194         }
4195       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
4196         {
4197           /* This can be for example an affine vs. constant dependence
4198              (T[i] vs. T[3]) that is not an affine dependence and is
4199              not representable as a distance vector.  */
4200           non_affine_dependence_relation (ddr);
4201           return false;
4202         }
4203     }
4204
4205   return true;
4206 }
4207
4208 /* Return true when the DDR contains only constant access functions.  */
4209
4210 static bool
4211 constant_access_functions (const struct data_dependence_relation *ddr)
4212 {
4213   unsigned i;
4214   subscript *sub;
4215
4216   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4217     if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
4218         || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
4219       return false;
4220
4221   return true;
4222 }
4223
4224 /* Helper function for the case where DDR_A and DDR_B are the same
4225    multivariate access function with a constant step.  For an example
4226    see pr34635-1.c.  */
4227
4228 static void
4229 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
4230 {
4231   int x_1, x_2;
4232   tree c_1 = CHREC_LEFT (c_2);
4233   tree c_0 = CHREC_LEFT (c_1);
4234   lambda_vector dist_v;
4235   HOST_WIDE_INT v1, v2, cd;
4236
4237   /* Polynomials with more than 2 variables are not handled yet.  When
4238      the evolution steps are parameters, it is not possible to
4239      represent the dependence using classical distance vectors.  */
4240   if (TREE_CODE (c_0) != INTEGER_CST
4241       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
4242       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
4243     {
4244       DDR_AFFINE_P (ddr) = false;
4245       return;
4246     }
4247
4248   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
4249   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
4250
4251   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
4252   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4253   v1 = int_cst_value (CHREC_RIGHT (c_1));
4254   v2 = int_cst_value (CHREC_RIGHT (c_2));
4255   cd = gcd (v1, v2);
4256   v1 /= cd;
4257   v2 /= cd;
4258
4259   if (v2 < 0)
4260     {
4261       v2 = -v2;
4262       v1 = -v1;
4263     }
4264
4265   dist_v[x_1] = v2;
4266   dist_v[x_2] = -v1;
4267   save_dist_v (ddr, dist_v);
4268
4269   add_outer_distances (ddr, dist_v, x_1);
4270 }
4271
4272 /* Helper function for the case where DDR_A and DDR_B are the same
4273    access functions.  */
4274
4275 static void
4276 add_other_self_distances (struct data_dependence_relation *ddr)
4277 {
4278   lambda_vector dist_v;
4279   unsigned i;
4280   int index_carry = DDR_NB_LOOPS (ddr);
4281   subscript *sub;
4282
4283   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4284     {
4285       tree access_fun = SUB_ACCESS_FN (sub, 0);
4286
4287       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
4288         {
4289           if (!evolution_function_is_univariate_p (access_fun))
4290             {
4291               if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
4292                 {
4293                   DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
4294                   return;
4295                 }
4296
4297               access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
4298
4299               if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
4300                 add_multivariate_self_dist (ddr, access_fun);
4301               else
4302                 /* The evolution step is not constant: it varies in
4303                    the outer loop, so this cannot be represented by a
4304                    distance vector.  For example in pr34635.c the
4305                    evolution is {0, +, {0, +, 4}_1}_2.  */
4306                 DDR_AFFINE_P (ddr) = false;
4307
4308               return;
4309             }
4310
4311           index_carry = MIN (index_carry,
4312                              index_in_loop_nest (CHREC_VARIABLE (access_fun),
4313                                                  DDR_LOOP_NEST (ddr)));
4314         }
4315     }
4316
4317   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4318   add_outer_distances (ddr, dist_v, index_carry);
4319 }
4320
4321 static void
4322 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
4323 {
4324   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4325
4326   dist_v[DDR_INNER_LOOP (ddr)] = 1;
4327   save_dist_v (ddr, dist_v);
4328 }
4329
4330 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
4331    is the case for example when access functions are the same and
4332    equal to a constant, as in:
4333
4334    | loop_1
4335    |   A[3] = ...
4336    |   ... = A[3]
4337    | endloop_1
4338
4339    in which case the distance vectors are (0) and (1).  */
4340
4341 static void
4342 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
4343 {
4344   unsigned i, j;
4345
4346   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4347     {
4348       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
4349       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
4350       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
4351
4352       for (j = 0; j < ca->n; j++)
4353         if (affine_function_zero_p (ca->fns[j]))
4354           {
4355             insert_innermost_unit_dist_vector (ddr);
4356             return;
4357           }
4358
4359       for (j = 0; j < cb->n; j++)
4360         if (affine_function_zero_p (cb->fns[j]))
4361           {
4362             insert_innermost_unit_dist_vector (ddr);
4363             return;
4364           }
4365     }
4366 }
4367
4368 /* Return true when the DDR contains two data references that have the
4369    same access functions.  */
4370
4371 static inline bool
4372 same_access_functions (const struct data_dependence_relation *ddr)
4373 {
4374   unsigned i;
4375   subscript *sub;
4376
4377   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4378     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
4379                           SUB_ACCESS_FN (sub, 1)))
4380       return false;
4381
4382   return true;
4383 }
4384
4385 /* Compute the classic per loop distance vector.  DDR is the data
4386    dependence relation to build a vector from.  Return false when fail
4387    to represent the data dependence as a distance vector.  */
4388
4389 static bool
4390 build_classic_dist_vector (struct data_dependence_relation *ddr,
4391                            struct loop *loop_nest)
4392 {
4393   bool init_b = false;
4394   int index_carry = DDR_NB_LOOPS (ddr);
4395   lambda_vector dist_v;
4396
4397   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4398     return false;
4399
4400   if (same_access_functions (ddr))
4401     {
4402       /* Save the 0 vector.  */
4403       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4404       save_dist_v (ddr, dist_v);
4405
4406       if (constant_access_functions (ddr))
4407         add_distance_for_zero_overlaps (ddr);
4408
4409       if (DDR_NB_LOOPS (ddr) > 1)
4410         add_other_self_distances (ddr);
4411
4412       return true;
4413     }
4414
4415   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4416   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
4417     return false;
4418
4419   /* Save the distance vector if we initialized one.  */
4420   if (init_b)
4421     {
4422       /* Verify a basic constraint: classic distance vectors should
4423          always be lexicographically positive.
4424
4425          Data references are collected in the order of execution of
4426          the program, thus for the following loop
4427
4428          | for (i = 1; i < 100; i++)
4429          |   for (j = 1; j < 100; j++)
4430          |     {
4431          |       t = T[j+1][i-1];  // A
4432          |       T[j][i] = t + 2;  // B
4433          |     }
4434
4435          references are collected following the direction of the wind:
4436          A then B.  The data dependence tests are performed also
4437          following this order, such that we're looking at the distance
4438          separating the elements accessed by A from the elements later
4439          accessed by B.  But in this example, the distance returned by
4440          test_dep (A, B) is lexicographically negative (-1, 1), that
4441          means that the access A occurs later than B with respect to
4442          the outer loop, ie. we're actually looking upwind.  In this
4443          case we solve test_dep (B, A) looking downwind to the
4444          lexicographically positive solution, that returns the
4445          distance vector (1, -1).  */
4446       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
4447         {
4448           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4449           if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4450             return false;
4451           compute_subscript_distance (ddr);
4452           if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
4453                                             &index_carry))
4454             return false;
4455           save_dist_v (ddr, save_v);
4456           DDR_REVERSED_P (ddr) = true;
4457
4458           /* In this case there is a dependence forward for all the
4459              outer loops:
4460
4461              | for (k = 1; k < 100; k++)
4462              |  for (i = 1; i < 100; i++)
4463              |   for (j = 1; j < 100; j++)
4464              |     {
4465              |       t = T[j+1][i-1];  // A
4466              |       T[j][i] = t + 2;  // B
4467              |     }
4468
4469              the vectors are:
4470              (0,  1, -1)
4471              (1,  1, -1)
4472              (1, -1,  1)
4473           */
4474           if (DDR_NB_LOOPS (ddr) > 1)
4475             {
4476               add_outer_distances (ddr, save_v, index_carry);
4477               add_outer_distances (ddr, dist_v, index_carry);
4478             }
4479         }
4480       else
4481         {
4482           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4483           lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4484
4485           if (DDR_NB_LOOPS (ddr) > 1)
4486             {
4487               lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4488
4489               if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4490                 return false;
4491               compute_subscript_distance (ddr);
4492               if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
4493                                                 &index_carry))
4494                 return false;
4495
4496               save_dist_v (ddr, save_v);
4497               add_outer_distances (ddr, dist_v, index_carry);
4498               add_outer_distances (ddr, opposite_v, index_carry);
4499             }
4500           else
4501             save_dist_v (ddr, save_v);
4502         }
4503     }
4504   else
4505     {
4506       /* There is a distance of 1 on all the outer loops: Example:
4507          there is a dependence of distance 1 on loop_1 for the array A.
4508
4509          | loop_1
4510          |   A[5] = ...
4511          | endloop
4512       */
4513       add_outer_distances (ddr, dist_v,
4514                            lambda_vector_first_nz (dist_v,
4515                                                    DDR_NB_LOOPS (ddr), 0));
4516     }
4517
4518   if (dump_file && (dump_flags & TDF_DETAILS))
4519     {
4520       unsigned i;
4521
4522       fprintf (dump_file, "(build_classic_dist_vector\n");
4523       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4524         {
4525           fprintf (dump_file, "  dist_vector = (");
4526           print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
4527                                DDR_NB_LOOPS (ddr));
4528           fprintf (dump_file, "  )\n");
4529         }
4530       fprintf (dump_file, ")\n");
4531     }
4532
4533   return true;
4534 }
4535
4536 /* Return the direction for a given distance.
4537    FIXME: Computing dir this way is suboptimal, since dir can catch
4538    cases that dist is unable to represent.  */
4539
4540 static inline enum data_dependence_direction
4541 dir_from_dist (int dist)
4542 {
4543   if (dist > 0)
4544     return dir_positive;
4545   else if (dist < 0)
4546     return dir_negative;
4547   else
4548     return dir_equal;
4549 }
4550
4551 /* Compute the classic per loop direction vector.  DDR is the data
4552    dependence relation to build a vector from.  */
4553
4554 static void
4555 build_classic_dir_vector (struct data_dependence_relation *ddr)
4556 {
4557   unsigned i, j;
4558   lambda_vector dist_v;
4559
4560   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
4561     {
4562       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4563
4564       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4565         dir_v[j] = dir_from_dist (dist_v[j]);
4566
4567       save_dir_v (ddr, dir_v);
4568     }
4569 }
4570
4571 /* Helper function.  Returns true when there is a dependence between the
4572    data references.  A_INDEX is the index of the first reference (0 for
4573    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
4574
4575 static bool
4576 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
4577                                unsigned int a_index, unsigned int b_index,
4578                                struct loop *loop_nest)
4579 {
4580   unsigned int i;
4581   tree last_conflicts;
4582   struct subscript *subscript;
4583   tree res = NULL_TREE;
4584
4585   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
4586     {
4587       conflict_function *overlaps_a, *overlaps_b;
4588
4589       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
4590                                       SUB_ACCESS_FN (subscript, b_index),
4591                                       &overlaps_a, &overlaps_b,
4592                                       &last_conflicts, loop_nest);
4593
4594       if (SUB_CONFLICTS_IN_A (subscript))
4595         free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4596       if (SUB_CONFLICTS_IN_B (subscript))
4597         free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4598
4599       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
4600       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
4601       SUB_LAST_CONFLICT (subscript) = last_conflicts;
4602
4603       /* If there is any undetermined conflict function we have to
4604          give a conservative answer in case we cannot prove that
4605          no dependence exists when analyzing another subscript.  */
4606       if (CF_NOT_KNOWN_P (overlaps_a)
4607           || CF_NOT_KNOWN_P (overlaps_b))
4608         {
4609           res = chrec_dont_know;
4610           continue;
4611         }
4612
4613       /* When there is a subscript with no dependence we can stop.  */
4614       else if (CF_NO_DEPENDENCE_P (overlaps_a)
4615                || CF_NO_DEPENDENCE_P (overlaps_b))
4616         {
4617           res = chrec_known;
4618           break;
4619         }
4620     }
4621
4622   if (res == NULL_TREE)
4623     return true;
4624
4625   if (res == chrec_known)
4626     dependence_stats.num_dependence_independent++;
4627   else
4628     dependence_stats.num_dependence_undetermined++;
4629   finalize_ddr_dependent (ddr, res);
4630   return false;
4631 }
4632
4633 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
4634
4635 static void
4636 subscript_dependence_tester (struct data_dependence_relation *ddr,
4637                              struct loop *loop_nest)
4638 {
4639   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
4640     dependence_stats.num_dependence_dependent++;
4641
4642   compute_subscript_distance (ddr);
4643   if (build_classic_dist_vector (ddr, loop_nest))
4644     build_classic_dir_vector (ddr);
4645 }
4646
4647 /* Returns true when all the access functions of A are affine or
4648    constant with respect to LOOP_NEST.  */
4649
4650 static bool
4651 access_functions_are_affine_or_constant_p (const struct data_reference *a,
4652                                            const struct loop *loop_nest)
4653 {
4654   unsigned int i;
4655   vec<tree> fns = DR_ACCESS_FNS (a);
4656   tree t;
4657
4658   FOR_EACH_VEC_ELT (fns, i, t)
4659     if (!evolution_function_is_invariant_p (t, loop_nest->num)
4660         && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
4661       return false;
4662
4663   return true;
4664 }
4665
4666 /* This computes the affine dependence relation between A and B with
4667    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
4668    independence between two accesses, while CHREC_DONT_KNOW is used
4669    for representing the unknown relation.
4670
4671    Note that it is possible to stop the computation of the dependence
4672    relation the first time we detect a CHREC_KNOWN element for a given
4673    subscript.  */
4674
4675 void
4676 compute_affine_dependence (struct data_dependence_relation *ddr,
4677                            struct loop *loop_nest)
4678 {
4679   struct data_reference *dra = DDR_A (ddr);
4680   struct data_reference *drb = DDR_B (ddr);
4681
4682   if (dump_file && (dump_flags & TDF_DETAILS))
4683     {
4684       fprintf (dump_file, "(compute_affine_dependence\n");
4685       fprintf (dump_file, "  stmt_a: ");
4686       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4687       fprintf (dump_file, "  stmt_b: ");
4688       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4689     }
4690
4691   /* Analyze only when the dependence relation is not yet known.  */
4692   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4693     {
4694       dependence_stats.num_dependence_tests++;
4695
4696       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4697           && access_functions_are_affine_or_constant_p (drb, loop_nest))
4698         subscript_dependence_tester (ddr, loop_nest);
4699
4700       /* As a last case, if the dependence cannot be determined, or if
4701          the dependence is considered too difficult to determine, answer
4702          "don't know".  */
4703       else
4704         {
4705           dependence_stats.num_dependence_undetermined++;
4706
4707           if (dump_file && (dump_flags & TDF_DETAILS))
4708             {
4709               fprintf (dump_file, "Data ref a:\n");
4710               dump_data_reference (dump_file, dra);
4711               fprintf (dump_file, "Data ref b:\n");
4712               dump_data_reference (dump_file, drb);
4713               fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4714             }
4715           finalize_ddr_dependent (ddr, chrec_dont_know);
4716         }
4717     }
4718
4719   if (dump_file && (dump_flags & TDF_DETAILS))
4720     {
4721       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4722         fprintf (dump_file, ") -> no dependence\n");
4723       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4724         fprintf (dump_file, ") -> dependence analysis failed\n");
4725       else
4726         fprintf (dump_file, ")\n");
4727     }
4728 }
4729
4730 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4731    the data references in DATAREFS, in the LOOP_NEST.  When
4732    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4733    relations.  Return true when successful, i.e. data references number
4734    is small enough to be handled.  */
4735
4736 bool
4737 compute_all_dependences (vec<data_reference_p> datarefs,
4738                          vec<ddr_p> *dependence_relations,
4739                          vec<loop_p> loop_nest,
4740                          bool compute_self_and_rr)
4741 {
4742   struct data_dependence_relation *ddr;
4743   struct data_reference *a, *b;
4744   unsigned int i, j;
4745
4746   if ((int) datarefs.length ()
4747       > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4748     {
4749       struct data_dependence_relation *ddr;
4750
4751       /* Insert a single relation into dependence_relations:
4752          chrec_dont_know.  */
4753       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4754       dependence_relations->safe_push (ddr);
4755       return false;
4756     }
4757
4758   FOR_EACH_VEC_ELT (datarefs, i, a)
4759     for (j = i + 1; datarefs.iterate (j, &b); j++)
4760       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4761         {
4762           ddr = initialize_data_dependence_relation (a, b, loop_nest);
4763           dependence_relations->safe_push (ddr);
4764           if (loop_nest.exists ())
4765             compute_affine_dependence (ddr, loop_nest[0]);
4766         }
4767
4768   if (compute_self_and_rr)
4769     FOR_EACH_VEC_ELT (datarefs, i, a)
4770       {
4771         ddr = initialize_data_dependence_relation (a, a, loop_nest);
4772         dependence_relations->safe_push (ddr);
4773         if (loop_nest.exists ())
4774           compute_affine_dependence (ddr, loop_nest[0]);
4775       }
4776
4777   return true;
4778 }
4779
4780 /* Describes a location of a memory reference.  */
4781
4782 struct data_ref_loc
4783 {
4784   /* The memory reference.  */
4785   tree ref;
4786
4787   /* True if the memory reference is read.  */
4788   bool is_read;
4789
4790   /* True if the data reference is conditional within the containing
4791      statement, i.e. if it might not occur even when the statement
4792      is executed and runs to completion.  */
4793   bool is_conditional_in_stmt;
4794 };
4795
4796
4797 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
4798    true if STMT clobbers memory, false otherwise.  */
4799
4800 static bool
4801 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
4802 {
4803   bool clobbers_memory = false;
4804   data_ref_loc ref;
4805   tree op0, op1;
4806   enum gimple_code stmt_code = gimple_code (stmt);
4807
4808   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4809      As we cannot model data-references to not spelled out
4810      accesses give up if they may occur.  */
4811   if (stmt_code == GIMPLE_CALL
4812       && !(gimple_call_flags (stmt) & ECF_CONST))
4813     {
4814       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
4815       if (gimple_call_internal_p (stmt))
4816         switch (gimple_call_internal_fn (stmt))
4817           {
4818           case IFN_GOMP_SIMD_LANE:
4819             {
4820               struct loop *loop = gimple_bb (stmt)->loop_father;
4821               tree uid = gimple_call_arg (stmt, 0);
4822               gcc_assert (TREE_CODE (uid) == SSA_NAME);
4823               if (loop == NULL
4824                   || loop->simduid != SSA_NAME_VAR (uid))
4825                 clobbers_memory = true;
4826               break;
4827             }
4828           case IFN_MASK_LOAD:
4829           case IFN_MASK_STORE:
4830             break;
4831           default:
4832             clobbers_memory = true;
4833             break;
4834           }
4835       else
4836         clobbers_memory = true;
4837     }
4838   else if (stmt_code == GIMPLE_ASM
4839            && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
4840                || gimple_vuse (stmt)))
4841     clobbers_memory = true;
4842
4843   if (!gimple_vuse (stmt))
4844     return clobbers_memory;
4845
4846   if (stmt_code == GIMPLE_ASSIGN)
4847     {
4848       tree base;
4849       op0 = gimple_assign_lhs (stmt);
4850       op1 = gimple_assign_rhs1 (stmt);
4851
4852       if (DECL_P (op1)
4853           || (REFERENCE_CLASS_P (op1)
4854               && (base = get_base_address (op1))
4855               && TREE_CODE (base) != SSA_NAME
4856               && !is_gimple_min_invariant (base)))
4857         {
4858           ref.ref = op1;
4859           ref.is_read = true;
4860           ref.is_conditional_in_stmt = false;
4861           references->safe_push (ref);
4862         }
4863     }
4864   else if (stmt_code == GIMPLE_CALL)
4865     {
4866       unsigned i, n;
4867       tree ptr, type;
4868       unsigned int align;
4869
4870       ref.is_read = false;
4871       if (gimple_call_internal_p (stmt))
4872         switch (gimple_call_internal_fn (stmt))
4873           {
4874           case IFN_MASK_LOAD:
4875             if (gimple_call_lhs (stmt) == NULL_TREE)
4876               break;
4877             ref.is_read = true;
4878             /* FALLTHRU */
4879           case IFN_MASK_STORE:
4880             ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
4881             align = tree_to_shwi (gimple_call_arg (stmt, 1));
4882             if (ref.is_read)
4883               type = TREE_TYPE (gimple_call_lhs (stmt));
4884             else
4885               type = TREE_TYPE (gimple_call_arg (stmt, 3));
4886             if (TYPE_ALIGN (type) != align)
4887               type = build_aligned_type (type, align);
4888             ref.is_conditional_in_stmt = true;
4889             ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
4890                                    ptr);
4891             references->safe_push (ref);
4892             return false;
4893           default:
4894             break;
4895           }
4896
4897       op0 = gimple_call_lhs (stmt);
4898       n = gimple_call_num_args (stmt);
4899       for (i = 0; i < n; i++)
4900         {
4901           op1 = gimple_call_arg (stmt, i);
4902
4903           if (DECL_P (op1)
4904               || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
4905             {
4906               ref.ref = op1;
4907               ref.is_read = true;
4908               ref.is_conditional_in_stmt = false;
4909               references->safe_push (ref);
4910             }
4911         }
4912     }
4913   else
4914     return clobbers_memory;
4915
4916   if (op0
4917       && (DECL_P (op0)
4918           || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
4919     {
4920       ref.ref = op0;
4921       ref.is_read = false;
4922       ref.is_conditional_in_stmt = false;
4923       references->safe_push (ref);
4924     }
4925   return clobbers_memory;
4926 }
4927
4928
4929 /* Returns true if the loop-nest has any data reference.  */
4930
4931 bool
4932 loop_nest_has_data_refs (loop_p loop)
4933 {
4934   basic_block *bbs = get_loop_body (loop);
4935   auto_vec<data_ref_loc, 3> references;
4936
4937   for (unsigned i = 0; i < loop->num_nodes; i++)
4938     {
4939       basic_block bb = bbs[i];
4940       gimple_stmt_iterator bsi;
4941
4942       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4943         {
4944           gimple *stmt = gsi_stmt (bsi);
4945           get_references_in_stmt (stmt, &references);
4946           if (references.length ())
4947             {
4948               free (bbs);
4949               return true;
4950             }
4951         }
4952     }
4953   free (bbs);
4954   return false;
4955 }
4956
4957 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
4958    reference, returns false, otherwise returns true.  NEST is the outermost
4959    loop of the loop nest in which the references should be analyzed.  */
4960
4961 bool
4962 find_data_references_in_stmt (struct loop *nest, gimple *stmt,
4963                               vec<data_reference_p> *datarefs)
4964 {
4965   unsigned i;
4966   auto_vec<data_ref_loc, 2> references;
4967   data_ref_loc *ref;
4968   bool ret = true;
4969   data_reference_p dr;
4970
4971   if (get_references_in_stmt (stmt, &references))
4972     return false;
4973
4974   FOR_EACH_VEC_ELT (references, i, ref)
4975     {
4976       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
4977                             loop_containing_stmt (stmt), ref->ref,
4978                             stmt, ref->is_read, ref->is_conditional_in_stmt);
4979       gcc_assert (dr != NULL);
4980       datarefs->safe_push (dr);
4981     }
4982
4983   return ret;
4984 }
4985
4986 /* Stores the data references in STMT to DATAREFS.  If there is an
4987    unanalyzable reference, returns false, otherwise returns true.
4988    NEST is the outermost loop of the loop nest in which the references
4989    should be instantiated, LOOP is the loop in which the references
4990    should be analyzed.  */
4991
4992 bool
4993 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
4994                                        vec<data_reference_p> *datarefs)
4995 {
4996   unsigned i;
4997   auto_vec<data_ref_loc, 2> references;
4998   data_ref_loc *ref;
4999   bool ret = true;
5000   data_reference_p dr;
5001
5002   if (get_references_in_stmt (stmt, &references))
5003     return false;
5004
5005   FOR_EACH_VEC_ELT (references, i, ref)
5006     {
5007       dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
5008                             ref->is_conditional_in_stmt);
5009       gcc_assert (dr != NULL);
5010       datarefs->safe_push (dr);
5011     }
5012
5013   return ret;
5014 }
5015
5016 /* Search the data references in LOOP, and record the information into
5017    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5018    difficult case, returns NULL_TREE otherwise.  */
5019
5020 tree
5021 find_data_references_in_bb (struct loop *loop, basic_block bb,
5022                             vec<data_reference_p> *datarefs)
5023 {
5024   gimple_stmt_iterator bsi;
5025
5026   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5027     {
5028       gimple *stmt = gsi_stmt (bsi);
5029
5030       if (!find_data_references_in_stmt (loop, stmt, datarefs))
5031         {
5032           struct data_reference *res;
5033           res = XCNEW (struct data_reference);
5034           datarefs->safe_push (res);
5035
5036           return chrec_dont_know;
5037         }
5038     }
5039
5040   return NULL_TREE;
5041 }
5042
5043 /* Search the data references in LOOP, and record the information into
5044    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5045    difficult case, returns NULL_TREE otherwise.
5046
5047    TODO: This function should be made smarter so that it can handle address
5048    arithmetic as if they were array accesses, etc.  */
5049
5050 tree
5051 find_data_references_in_loop (struct loop *loop,
5052                               vec<data_reference_p> *datarefs)
5053 {
5054   basic_block bb, *bbs;
5055   unsigned int i;
5056
5057   bbs = get_loop_body_in_dom_order (loop);
5058
5059   for (i = 0; i < loop->num_nodes; i++)
5060     {
5061       bb = bbs[i];
5062
5063       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
5064         {
5065           free (bbs);
5066           return chrec_dont_know;
5067         }
5068     }
5069   free (bbs);
5070
5071   return NULL_TREE;
5072 }
5073
5074 /* Return the alignment in bytes that DRB is guaranteed to have at all
5075    times.  */
5076
5077 unsigned int
5078 dr_alignment (innermost_loop_behavior *drb)
5079 {
5080   /* Get the alignment of BASE_ADDRESS + INIT.  */
5081   unsigned int alignment = drb->base_alignment;
5082   unsigned int misalignment = (drb->base_misalignment
5083                                + TREE_INT_CST_LOW (drb->init));
5084   if (misalignment != 0)
5085     alignment = MIN (alignment, misalignment & -misalignment);
5086
5087   /* Cap it to the alignment of OFFSET.  */
5088   if (!integer_zerop (drb->offset))
5089     alignment = MIN (alignment, drb->offset_alignment);
5090
5091   /* Cap it to the alignment of STEP.  */
5092   if (!integer_zerop (drb->step))
5093     alignment = MIN (alignment, drb->step_alignment);
5094
5095   return alignment;
5096 }
5097
5098 /* Recursive helper function.  */
5099
5100 static bool
5101 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
5102 {
5103   /* Inner loops of the nest should not contain siblings.  Example:
5104      when there are two consecutive loops,
5105
5106      | loop_0
5107      |   loop_1
5108      |     A[{0, +, 1}_1]
5109      |   endloop_1
5110      |   loop_2
5111      |     A[{0, +, 1}_2]
5112      |   endloop_2
5113      | endloop_0
5114
5115      the dependence relation cannot be captured by the distance
5116      abstraction.  */
5117   if (loop->next)
5118     return false;
5119
5120   loop_nest->safe_push (loop);
5121   if (loop->inner)
5122     return find_loop_nest_1 (loop->inner, loop_nest);
5123   return true;
5124 }
5125
5126 /* Return false when the LOOP is not well nested.  Otherwise return
5127    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
5128    contain the loops from the outermost to the innermost, as they will
5129    appear in the classic distance vector.  */
5130
5131 bool
5132 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
5133 {
5134   loop_nest->safe_push (loop);
5135   if (loop->inner)
5136     return find_loop_nest_1 (loop->inner, loop_nest);
5137   return true;
5138 }
5139
5140 /* Returns true when the data dependences have been computed, false otherwise.
5141    Given a loop nest LOOP, the following vectors are returned:
5142    DATAREFS is initialized to all the array elements contained in this loop,
5143    DEPENDENCE_RELATIONS contains the relations between the data references.
5144    Compute read-read and self relations if
5145    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
5146
5147 bool
5148 compute_data_dependences_for_loop (struct loop *loop,
5149                                    bool compute_self_and_read_read_dependences,
5150                                    vec<loop_p> *loop_nest,
5151                                    vec<data_reference_p> *datarefs,
5152                                    vec<ddr_p> *dependence_relations)
5153 {
5154   bool res = true;
5155
5156   memset (&dependence_stats, 0, sizeof (dependence_stats));
5157
5158   /* If the loop nest is not well formed, or one of the data references
5159      is not computable, give up without spending time to compute other
5160      dependences.  */
5161   if (!loop
5162       || !find_loop_nest (loop, loop_nest)
5163       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
5164       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
5165                                    compute_self_and_read_read_dependences))
5166     res = false;
5167
5168   if (dump_file && (dump_flags & TDF_STATS))
5169     {
5170       fprintf (dump_file, "Dependence tester statistics:\n");
5171
5172       fprintf (dump_file, "Number of dependence tests: %d\n",
5173                dependence_stats.num_dependence_tests);
5174       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
5175                dependence_stats.num_dependence_dependent);
5176       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
5177                dependence_stats.num_dependence_independent);
5178       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
5179                dependence_stats.num_dependence_undetermined);
5180
5181       fprintf (dump_file, "Number of subscript tests: %d\n",
5182                dependence_stats.num_subscript_tests);
5183       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
5184                dependence_stats.num_subscript_undetermined);
5185       fprintf (dump_file, "Number of same subscript function: %d\n",
5186                dependence_stats.num_same_subscript_function);
5187
5188       fprintf (dump_file, "Number of ziv tests: %d\n",
5189                dependence_stats.num_ziv);
5190       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
5191                dependence_stats.num_ziv_dependent);
5192       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
5193                dependence_stats.num_ziv_independent);
5194       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
5195                dependence_stats.num_ziv_unimplemented);
5196
5197       fprintf (dump_file, "Number of siv tests: %d\n",
5198                dependence_stats.num_siv);
5199       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
5200                dependence_stats.num_siv_dependent);
5201       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
5202                dependence_stats.num_siv_independent);
5203       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
5204                dependence_stats.num_siv_unimplemented);
5205
5206       fprintf (dump_file, "Number of miv tests: %d\n",
5207                dependence_stats.num_miv);
5208       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
5209                dependence_stats.num_miv_dependent);
5210       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
5211                dependence_stats.num_miv_independent);
5212       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
5213                dependence_stats.num_miv_unimplemented);
5214     }
5215
5216   return res;
5217 }
5218
5219 /* Free the memory used by a data dependence relation DDR.  */
5220
5221 void
5222 free_dependence_relation (struct data_dependence_relation *ddr)
5223 {
5224   if (ddr == NULL)
5225     return;
5226
5227   if (DDR_SUBSCRIPTS (ddr).exists ())
5228     free_subscripts (DDR_SUBSCRIPTS (ddr));
5229   DDR_DIST_VECTS (ddr).release ();
5230   DDR_DIR_VECTS (ddr).release ();
5231
5232   free (ddr);
5233 }
5234
5235 /* Free the memory used by the data dependence relations from
5236    DEPENDENCE_RELATIONS.  */
5237
5238 void
5239 free_dependence_relations (vec<ddr_p> dependence_relations)
5240 {
5241   unsigned int i;
5242   struct data_dependence_relation *ddr;
5243
5244   FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5245     if (ddr)
5246       free_dependence_relation (ddr);
5247
5248   dependence_relations.release ();
5249 }
5250
5251 /* Free the memory used by the data references from DATAREFS.  */
5252
5253 void
5254 free_data_refs (vec<data_reference_p> datarefs)
5255 {
5256   unsigned int i;
5257   struct data_reference *dr;
5258
5259   FOR_EACH_VEC_ELT (datarefs, i, dr)
5260     free_data_ref (dr);
5261   datarefs.release ();
5262 }