gcc/tree-data-ref.c

   1 /* Data references and dependences detectors.
   2    Copyright (C) 2003-2017 Free Software Foundation, Inc.
   3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass walks a given loop structure searching for array
  22    references.  The information about the array accesses is recorded
  23    in DATA_REFERENCE structures.
  24
  25    The basic test for determining the dependences is:
  26    given two access functions chrec1 and chrec2 to a same array, and
  27    x and y two vectors from the iteration domain, the same element of
  28    the array is accessed twice at iterations x and y if and only if:
  29    |             chrec1 (x) == chrec2 (y).
  30
  31    The goals of this analysis are:
  32
  33    - to determine the independence: the relation between two
  34      independent accesses is qualified with the chrec_known (this
  35      information allows a loop parallelization),
  36
  37    - when two data references access the same data, to qualify the
  38      dependence relation with classic dependence representations:
  39
  40        - distance vectors
  41        - direction vectors
  42        - loop carried level dependence
  43        - polyhedron dependence
  44      or with the chains of recurrences based representation,
  45
  46    - to define a knowledge base for storing the data dependence
  47      information,
  48
  49    - to define an interface to access this data.
  50
  51
  52    Definitions:
  53
  54    - subscript: given two array accesses a subscript is the tuple
  55    composed of the access functions for a given dimension.  Example:
  56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
  57    (f1, g1), (f2, g2), (f3, g3).
  58
  59    - Diophantine equation: an equation whose coefficients and
  60    solutions are integer constants, for example the equation
  61    |   3*x + 2*y = 1
  62    has an integer solution x = 1 and y = -1.
  63
  64    References:
  65
  66    - "Advanced Compilation for High Performance Computing" by Randy
  67    Allen and Ken Kennedy.
  68    http://citeseer.ist.psu.edu/goff91practical.html
  69
  70    - "Loop Transformations for Restructuring Compilers - The Foundations"
  71    by Utpal Banerjee.
  72
  73
  74 */
  75
  76 #include "config.h"
  77 #include "system.h"
  78 #include "coretypes.h"
  79 #include "backend.h"
  80 #include "rtl.h"
  81 #include "tree.h"
  82 #include "gimple.h"
  83 #include "gimple-pretty-print.h"
  84 #include "alias.h"
  85 #include "fold-const.h"
  86 #include "expr.h"
  87 #include "gimple-iterator.h"
  88 #include "tree-ssa-loop-niter.h"
  89 #include "tree-ssa-loop.h"
  90 #include "tree-ssa.h"
  91 #include "cfgloop.h"
  92 #include "tree-data-ref.h"
  93 #include "tree-scalar-evolution.h"
  94 #include "dumpfile.h"
  95 #include "tree-affine.h"
  96 #include "params.h"
  97
  98 static struct datadep_stats
  99 {
 100   int num_dependence_tests;
 101   int num_dependence_dependent;
 102   int num_dependence_independent;
 103   int num_dependence_undetermined;
 104
 105   int num_subscript_tests;
 106   int num_subscript_undetermined;
 107   int num_same_subscript_function;
 108
 109   int num_ziv;
 110   int num_ziv_independent;
 111   int num_ziv_dependent;
 112   int num_ziv_unimplemented;
 113
 114   int num_siv;
 115   int num_siv_independent;
 116   int num_siv_dependent;
 117   int num_siv_unimplemented;
 118
 119   int num_miv;
 120   int num_miv_independent;
 121   int num_miv_dependent;
 122   int num_miv_unimplemented;
 123 } dependence_stats;
 124
 125 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
 126                                            struct data_reference *,
 127                                            struct data_reference *,
 128                                            struct loop *);
 129 /* Returns true iff A divides B.  */
 130
 131 static inline bool
 132 tree_fold_divides_p (const_tree a, const_tree b)
 133 {
 134   gcc_assert (TREE_CODE (a) == INTEGER_CST);
 135   gcc_assert (TREE_CODE (b) == INTEGER_CST);
 136   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
 137 }
 138
 139 /* Returns true iff A divides B.  */
 140
 141 static inline bool
 142 int_divides_p (int a, int b)
 143 {
 144   return ((b % a) == 0);
 145 }
 146
 147 \f
 148
 149 /* Dump into FILE all the data references from DATAREFS.  */
 150
 151 static void
 152 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
 153 {
 154   unsigned int i;
 155   struct data_reference *dr;
 156
 157   FOR_EACH_VEC_ELT (datarefs, i, dr)
 158     dump_data_reference (file, dr);
 159 }
 160
 161 /* Unified dump into FILE all the data references from DATAREFS.  */
 162
 163 DEBUG_FUNCTION void
 164 debug (vec<data_reference_p> &ref)
 165 {
 166   dump_data_references (stderr, ref);
 167 }
 168
 169 DEBUG_FUNCTION void
 170 debug (vec<data_reference_p> *ptr)
 171 {
 172   if (ptr)
 173     debug (*ptr);
 174   else
 175     fprintf (stderr, "<nil>\n");
 176 }
 177
 178
 179 /* Dump into STDERR all the data references from DATAREFS.  */
 180
 181 DEBUG_FUNCTION void
 182 debug_data_references (vec<data_reference_p> datarefs)
 183 {
 184   dump_data_references (stderr, datarefs);
 185 }
 186
 187 /* Print to STDERR the data_reference DR.  */
 188
 189 DEBUG_FUNCTION void
 190 debug_data_reference (struct data_reference *dr)
 191 {
 192   dump_data_reference (stderr, dr);
 193 }
 194
 195 /* Dump function for a DATA_REFERENCE structure.  */
 196
 197 void
 198 dump_data_reference (FILE *outf,
 199                      struct data_reference *dr)
 200 {
 201   unsigned int i;
 202
 203   fprintf (outf, "#(Data Ref: \n");
 204   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
 205   fprintf (outf, "#  stmt: ");
 206   print_gimple_stmt (outf, DR_STMT (dr), 0);
 207   fprintf (outf, "#  ref: ");
 208   print_generic_stmt (outf, DR_REF (dr));
 209   fprintf (outf, "#  base_object: ");
 210   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
 211
 212   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
 213     {
 214       fprintf (outf, "#  Access function %d: ", i);
 215       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
 216     }
 217   fprintf (outf, "#)\n");
 218 }
 219
 220 /* Unified dump function for a DATA_REFERENCE structure.  */
 221
 222 DEBUG_FUNCTION void
 223 debug (data_reference &ref)
 224 {
 225   dump_data_reference (stderr, &ref);
 226 }
 227
 228 DEBUG_FUNCTION void
 229 debug (data_reference *ptr)
 230 {
 231   if (ptr)
 232     debug (*ptr);
 233   else
 234     fprintf (stderr, "<nil>\n");
 235 }
 236
 237
 238 /* Dumps the affine function described by FN to the file OUTF.  */
 239
 240 DEBUG_FUNCTION void
 241 dump_affine_function (FILE *outf, affine_fn fn)
 242 {
 243   unsigned i;
 244   tree coef;
 245
 246   print_generic_expr (outf, fn[0], TDF_SLIM);
 247   for (i = 1; fn.iterate (i, &coef); i++)
 248     {
 249       fprintf (outf, " + ");
 250       print_generic_expr (outf, coef, TDF_SLIM);
 251       fprintf (outf, " * x_%u", i);
 252     }
 253 }
 254
 255 /* Dumps the conflict function CF to the file OUTF.  */
 256
 257 DEBUG_FUNCTION void
 258 dump_conflict_function (FILE *outf, conflict_function *cf)
 259 {
 260   unsigned i;
 261
 262   if (cf->n == NO_DEPENDENCE)
 263     fprintf (outf, "no dependence");
 264   else if (cf->n == NOT_KNOWN)
 265     fprintf (outf, "not known");
 266   else
 267     {
 268       for (i = 0; i < cf->n; i++)
 269         {
 270           if (i != 0)
 271             fprintf (outf, " ");
 272           fprintf (outf, "[");
 273           dump_affine_function (outf, cf->fns[i]);
 274           fprintf (outf, "]");
 275         }
 276     }
 277 }
 278
 279 /* Dump function for a SUBSCRIPT structure.  */
 280
 281 DEBUG_FUNCTION void
 282 dump_subscript (FILE *outf, struct subscript *subscript)
 283 {
 284   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
 285
 286   fprintf (outf, "\n (subscript \n");
 287   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
 288   dump_conflict_function (outf, cf);
 289   if (CF_NONTRIVIAL_P (cf))
 290     {
 291       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 292       fprintf (outf, "\n  last_conflict: ");
 293       print_generic_expr (outf, last_iteration);
 294     }
 295
 296   cf = SUB_CONFLICTS_IN_B (subscript);
 297   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
 298   dump_conflict_function (outf, cf);
 299   if (CF_NONTRIVIAL_P (cf))
 300     {
 301       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 302       fprintf (outf, "\n  last_conflict: ");
 303       print_generic_expr (outf, last_iteration);
 304     }
 305
 306   fprintf (outf, "\n  (Subscript distance: ");
 307   print_generic_expr (outf, SUB_DISTANCE (subscript));
 308   fprintf (outf, " ))\n");
 309 }
 310
 311 /* Print the classic direction vector DIRV to OUTF.  */
 312
 313 DEBUG_FUNCTION void
 314 print_direction_vector (FILE *outf,
 315                         lambda_vector dirv,
 316                         int length)
 317 {
 318   int eq;
 319
 320   for (eq = 0; eq < length; eq++)
 321     {
 322       enum data_dependence_direction dir = ((enum data_dependence_direction)
 323                                             dirv[eq]);
 324
 325       switch (dir)
 326         {
 327         case dir_positive:
 328           fprintf (outf, "    +");
 329           break;
 330         case dir_negative:
 331           fprintf (outf, "    -");
 332           break;
 333         case dir_equal:
 334           fprintf (outf, "    =");
 335           break;
 336         case dir_positive_or_equal:
 337           fprintf (outf, "   +=");
 338           break;
 339         case dir_positive_or_negative:
 340           fprintf (outf, "   +-");
 341           break;
 342         case dir_negative_or_equal:
 343           fprintf (outf, "   -=");
 344           break;
 345         case dir_star:
 346           fprintf (outf, "    *");
 347           break;
 348         default:
 349           fprintf (outf, "indep");
 350           break;
 351         }
 352     }
 353   fprintf (outf, "\n");
 354 }
 355
 356 /* Print a vector of direction vectors.  */
 357
 358 DEBUG_FUNCTION void
 359 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
 360                    int length)
 361 {
 362   unsigned j;
 363   lambda_vector v;
 364
 365   FOR_EACH_VEC_ELT (dir_vects, j, v)
 366     print_direction_vector (outf, v, length);
 367 }
 368
 369 /* Print out a vector VEC of length N to OUTFILE.  */
 370
 371 DEBUG_FUNCTION void
 372 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
 373 {
 374   int i;
 375
 376   for (i = 0; i < n; i++)
 377     fprintf (outfile, "%3d ", vector[i]);
 378   fprintf (outfile, "\n");
 379 }
 380
 381 /* Print a vector of distance vectors.  */
 382
 383 DEBUG_FUNCTION void
 384 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
 385                     int length)
 386 {
 387   unsigned j;
 388   lambda_vector v;
 389
 390   FOR_EACH_VEC_ELT (dist_vects, j, v)
 391     print_lambda_vector (outf, v, length);
 392 }
 393
 394 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
 395
 396 DEBUG_FUNCTION void
 397 dump_data_dependence_relation (FILE *outf,
 398                                struct data_dependence_relation *ddr)
 399 {
 400   struct data_reference *dra, *drb;
 401
 402   fprintf (outf, "(Data Dep: \n");
 403
 404   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 405     {
 406       if (ddr)
 407         {
 408           dra = DDR_A (ddr);
 409           drb = DDR_B (ddr);
 410           if (dra)
 411             dump_data_reference (outf, dra);
 412           else
 413             fprintf (outf, "    (nil)\n");
 414           if (drb)
 415             dump_data_reference (outf, drb);
 416           else
 417             fprintf (outf, "    (nil)\n");
 418         }
 419       fprintf (outf, "    (don't know)\n)\n");
 420       return;
 421     }
 422
 423   dra = DDR_A (ddr);
 424   drb = DDR_B (ddr);
 425   dump_data_reference (outf, dra);
 426   dump_data_reference (outf, drb);
 427
 428   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 429     fprintf (outf, "    (no dependence)\n");
 430
 431   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
 432     {
 433       unsigned int i;
 434       struct loop *loopi;
 435
 436       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
 437         {
 438           fprintf (outf, "  access_fn_A: ");
 439           print_generic_stmt (outf, DR_ACCESS_FN (dra, i));
 440           fprintf (outf, "  access_fn_B: ");
 441           print_generic_stmt (outf, DR_ACCESS_FN (drb, i));
 442           dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
 443         }
 444
 445       fprintf (outf, "  inner loop index: %d\n", DDR_INNER_LOOP (ddr));
 446       fprintf (outf, "  loop nest: (");
 447       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
 448         fprintf (outf, "%d ", loopi->num);
 449       fprintf (outf, ")\n");
 450
 451       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
 452         {
 453           fprintf (outf, "  distance_vector: ");
 454           print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
 455                                DDR_NB_LOOPS (ddr));
 456         }
 457
 458       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
 459         {
 460           fprintf (outf, "  direction_vector: ");
 461           print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
 462                                   DDR_NB_LOOPS (ddr));
 463         }
 464     }
 465
 466   fprintf (outf, ")\n");
 467 }
 468
 469 /* Debug version.  */
 470
 471 DEBUG_FUNCTION void
 472 debug_data_dependence_relation (struct data_dependence_relation *ddr)
 473 {
 474   dump_data_dependence_relation (stderr, ddr);
 475 }
 476
 477 /* Dump into FILE all the dependence relations from DDRS.  */
 478
 479 DEBUG_FUNCTION void
 480 dump_data_dependence_relations (FILE *file,
 481                                 vec<ddr_p> ddrs)
 482 {
 483   unsigned int i;
 484   struct data_dependence_relation *ddr;
 485
 486   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 487     dump_data_dependence_relation (file, ddr);
 488 }
 489
 490 DEBUG_FUNCTION void
 491 debug (vec<ddr_p> &ref)
 492 {
 493   dump_data_dependence_relations (stderr, ref);
 494 }
 495
 496 DEBUG_FUNCTION void
 497 debug (vec<ddr_p> *ptr)
 498 {
 499   if (ptr)
 500     debug (*ptr);
 501   else
 502     fprintf (stderr, "<nil>\n");
 503 }
 504
 505
 506 /* Dump to STDERR all the dependence relations from DDRS.  */
 507
 508 DEBUG_FUNCTION void
 509 debug_data_dependence_relations (vec<ddr_p> ddrs)
 510 {
 511   dump_data_dependence_relations (stderr, ddrs);
 512 }
 513
 514 /* Dumps the distance and direction vectors in FILE.  DDRS contains
 515    the dependence relations, and VECT_SIZE is the size of the
 516    dependence vectors, or in other words the number of loops in the
 517    considered nest.  */
 518
 519 DEBUG_FUNCTION void
 520 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
 521 {
 522   unsigned int i, j;
 523   struct data_dependence_relation *ddr;
 524   lambda_vector v;
 525
 526   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 527     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
 528       {
 529         FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
 530           {
 531             fprintf (file, "DISTANCE_V (");
 532             print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
 533             fprintf (file, ")\n");
 534           }
 535
 536         FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
 537           {
 538             fprintf (file, "DIRECTION_V (");
 539             print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
 540             fprintf (file, ")\n");
 541           }
 542       }
 543
 544   fprintf (file, "\n\n");
 545 }
 546
 547 /* Dumps the data dependence relations DDRS in FILE.  */
 548
 549 DEBUG_FUNCTION void
 550 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
 551 {
 552   unsigned int i;
 553   struct data_dependence_relation *ddr;
 554
 555   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 556     dump_data_dependence_relation (file, ddr);
 557
 558   fprintf (file, "\n\n");
 559 }
 560
 561 DEBUG_FUNCTION void
 562 debug_ddrs (vec<ddr_p> ddrs)
 563 {
 564   dump_ddrs (stderr, ddrs);
 565 }
 566
 567 /* Helper function for split_constant_offset.  Expresses OP0 CODE OP1
 568    (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
 569    constant of type ssizetype, and returns true.  If we cannot do this
 570    with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
 571    is returned.  */
 572
 573 static bool
 574 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 575                          tree *var, tree *off)
 576 {
 577   tree var0, var1;
 578   tree off0, off1;
 579   enum tree_code ocode = code;
 580
 581   *var = NULL_TREE;
 582   *off = NULL_TREE;
 583
 584   switch (code)
 585     {
 586     case INTEGER_CST:
 587       *var = build_int_cst (type, 0);
 588       *off = fold_convert (ssizetype, op0);
 589       return true;
 590
 591     case POINTER_PLUS_EXPR:
 592       ocode = PLUS_EXPR;
 593       /* FALLTHROUGH */
 594     case PLUS_EXPR:
 595     case MINUS_EXPR:
 596       split_constant_offset (op0, &var0, &off0);
 597       split_constant_offset (op1, &var1, &off1);
 598       *var = fold_build2 (code, type, var0, var1);
 599       *off = size_binop (ocode, off0, off1);
 600       return true;
 601
 602     case MULT_EXPR:
 603       if (TREE_CODE (op1) != INTEGER_CST)
 604         return false;
 605
 606       split_constant_offset (op0, &var0, &off0);
 607       *var = fold_build2 (MULT_EXPR, type, var0, op1);
 608       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
 609       return true;
 610
 611     case ADDR_EXPR:
 612       {
 613         tree base, poffset;
 614         HOST_WIDE_INT pbitsize, pbitpos;
 615         machine_mode pmode;
 616         int punsignedp, preversep, pvolatilep;
 617
 618         op0 = TREE_OPERAND (op0, 0);
 619         base
 620           = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
 621                                  &punsignedp, &preversep, &pvolatilep);
 622
 623         if (pbitpos % BITS_PER_UNIT != 0)
 624           return false;
 625         base = build_fold_addr_expr (base);
 626         off0 = ssize_int (pbitpos / BITS_PER_UNIT);
 627
 628         if (poffset)
 629           {
 630             split_constant_offset (poffset, &poffset, &off1);
 631             off0 = size_binop (PLUS_EXPR, off0, off1);
 632             if (POINTER_TYPE_P (TREE_TYPE (base)))
 633               base = fold_build_pointer_plus (base, poffset);
 634             else
 635               base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
 636                                   fold_convert (TREE_TYPE (base), poffset));
 637           }
 638
 639         var0 = fold_convert (type, base);
 640
 641         /* If variable length types are involved, punt, otherwise casts
 642            might be converted into ARRAY_REFs in gimplify_conversion.
 643            To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
 644            possibly no longer appears in current GIMPLE, might resurface.
 645            This perhaps could run
 646            if (CONVERT_EXPR_P (var0))
 647              {
 648                gimplify_conversion (&var0);
 649                // Attempt to fill in any within var0 found ARRAY_REF's
 650                // element size from corresponding op embedded ARRAY_REF,
 651                // if unsuccessful, just punt.
 652              }  */
 653         while (POINTER_TYPE_P (type))
 654           type = TREE_TYPE (type);
 655         if (int_size_in_bytes (type) < 0)
 656           return false;
 657
 658         *var = var0;
 659         *off = off0;
 660         return true;
 661       }
 662
 663     case SSA_NAME:
 664       {
 665         if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
 666           return false;
 667
 668         gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
 669         enum tree_code subcode;
 670
 671         if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
 672           return false;
 673
 674         var0 = gimple_assign_rhs1 (def_stmt);
 675         subcode = gimple_assign_rhs_code (def_stmt);
 676         var1 = gimple_assign_rhs2 (def_stmt);
 677
 678         return split_constant_offset_1 (type, var0, subcode, var1, var, off);
 679       }
 680     CASE_CONVERT:
 681       {
 682         /* We must not introduce undefined overflow, and we must not change the value.
 683            Hence we're okay if the inner type doesn't overflow to start with
 684            (pointer or signed), the outer type also is an integer or pointer
 685            and the outer precision is at least as large as the inner.  */
 686         tree itype = TREE_TYPE (op0);
 687         if ((POINTER_TYPE_P (itype)
 688              || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
 689             && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
 690             && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
 691           {
 692             split_constant_offset (op0, &var0, off);
 693             *var = fold_convert (type, var0);
 694             return true;
 695           }
 696         return false;
 697       }
 698
 699     default:
 700       return false;
 701     }
 702 }
 703
 704 /* Expresses EXP as VAR + OFF, where off is a constant.  The type of OFF
 705    will be ssizetype.  */
 706
 707 void
 708 split_constant_offset (tree exp, tree *var, tree *off)
 709 {
 710   tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
 711   enum tree_code code;
 712
 713   *var = exp;
 714   *off = ssize_int (0);
 715   STRIP_NOPS (exp);
 716
 717   if (tree_is_chrec (exp)
 718       || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
 719     return;
 720
 721   otype = TREE_TYPE (exp);
 722   code = TREE_CODE (exp);
 723   extract_ops_from_tree (exp, &code, &op0, &op1);
 724   if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
 725     {
 726       *var = fold_convert (type, e);
 727       *off = o;
 728     }
 729 }
 730
 731 /* Returns the address ADDR of an object in a canonical shape (without nop
 732    casts, and with type of pointer to the object).  */
 733
 734 static tree
 735 canonicalize_base_object_address (tree addr)
 736 {
 737   tree orig = addr;
 738
 739   STRIP_NOPS (addr);
 740
 741   /* The base address may be obtained by casting from integer, in that case
 742      keep the cast.  */
 743   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
 744     return orig;
 745
 746   if (TREE_CODE (addr) != ADDR_EXPR)
 747     return addr;
 748
 749   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
 750 }
 751
 752 /* Analyzes the behavior of the memory reference DR in the innermost loop or
 753    basic block that contains it.  Returns true if analysis succeed or false
 754    otherwise.  */
 755
 756 bool
 757 dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
 758 {
 759   gimple *stmt = DR_STMT (dr);
 760   struct loop *loop = loop_containing_stmt (stmt);
 761   tree ref = DR_REF (dr);
 762   HOST_WIDE_INT pbitsize, pbitpos;
 763   tree base, poffset;
 764   machine_mode pmode;
 765   int punsignedp, preversep, pvolatilep;
 766   affine_iv base_iv, offset_iv;
 767   tree init, dinit, step;
 768   bool in_loop = (loop && loop->num);
 769
 770   if (dump_file && (dump_flags & TDF_DETAILS))
 771     fprintf (dump_file, "analyze_innermost: ");
 772
 773   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
 774                               &punsignedp, &preversep, &pvolatilep);
 775   gcc_assert (base != NULL_TREE);
 776
 777   if (pbitpos % BITS_PER_UNIT != 0)
 778     {
 779       if (dump_file && (dump_flags & TDF_DETAILS))
 780         fprintf (dump_file, "failed: bit offset alignment.\n");
 781       return false;
 782     }
 783
 784   if (preversep)
 785     {
 786       if (dump_file && (dump_flags & TDF_DETAILS))
 787         fprintf (dump_file, "failed: reverse storage order.\n");
 788       return false;
 789     }
 790
 791   if (TREE_CODE (base) == MEM_REF)
 792     {
 793       if (!integer_zerop (TREE_OPERAND (base, 1)))
 794         {
 795           offset_int moff = mem_ref_offset (base);
 796           tree mofft = wide_int_to_tree (sizetype, moff);
 797           if (!poffset)
 798             poffset = mofft;
 799           else
 800             poffset = size_binop (PLUS_EXPR, poffset, mofft);
 801         }
 802       base = TREE_OPERAND (base, 0);
 803     }
 804   else
 805     base = build_fold_addr_expr (base);
 806
 807   if (in_loop)
 808     {
 809       if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
 810                       nest ? true : false))
 811         {
 812           if (nest)
 813             {
 814               if (dump_file && (dump_flags & TDF_DETAILS))
 815                 fprintf (dump_file, "failed: evolution of base is not"
 816                                     " affine.\n");
 817               return false;
 818             }
 819           else
 820             {
 821               base_iv.base = base;
 822               base_iv.step = ssize_int (0);
 823               base_iv.no_overflow = true;
 824             }
 825         }
 826     }
 827   else
 828     {
 829       base_iv.base = base;
 830       base_iv.step = ssize_int (0);
 831       base_iv.no_overflow = true;
 832     }
 833
 834   if (!poffset)
 835     {
 836       offset_iv.base = ssize_int (0);
 837       offset_iv.step = ssize_int (0);
 838     }
 839   else
 840     {
 841       if (!in_loop)
 842         {
 843           offset_iv.base = poffset;
 844           offset_iv.step = ssize_int (0);
 845         }
 846       else if (!simple_iv (loop, loop_containing_stmt (stmt),
 847                            poffset, &offset_iv,
 848                            nest ? true : false))
 849         {
 850           if (nest)
 851             {
 852               if (dump_file && (dump_flags & TDF_DETAILS))
 853                 fprintf (dump_file, "failed: evolution of offset is not"
 854                                     " affine.\n");
 855               return false;
 856             }
 857           else
 858             {
 859               offset_iv.base = poffset;
 860               offset_iv.step = ssize_int (0);
 861             }
 862         }
 863     }
 864
 865   init = ssize_int (pbitpos / BITS_PER_UNIT);
 866   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
 867   init =  size_binop (PLUS_EXPR, init, dinit);
 868   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
 869   init =  size_binop (PLUS_EXPR, init, dinit);
 870
 871   step = size_binop (PLUS_EXPR,
 872                      fold_convert (ssizetype, base_iv.step),
 873                      fold_convert (ssizetype, offset_iv.step));
 874
 875   DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
 876
 877   DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
 878   DR_INIT (dr) = init;
 879   DR_STEP (dr) = step;
 880
 881   DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
 882
 883   if (dump_file && (dump_flags & TDF_DETAILS))
 884     fprintf (dump_file, "success.\n");
 885
 886   return true;
 887 }
 888
 889 /* Determines the base object and the list of indices of memory reference
 890    DR, analyzed in LOOP and instantiated in loop nest NEST.  */
 891
 892 static void
 893 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
 894 {
 895   vec<tree> access_fns = vNULL;
 896   tree ref, op;
 897   tree base, off, access_fn;
 898   basic_block before_loop;
 899
 900   /* If analyzing a basic-block there are no indices to analyze
 901      and thus no access functions.  */
 902   if (!nest)
 903     {
 904       DR_BASE_OBJECT (dr) = DR_REF (dr);
 905       DR_ACCESS_FNS (dr).create (0);
 906       return;
 907     }
 908
 909   ref = DR_REF (dr);
 910   before_loop = block_before_loop (nest);
 911
 912   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
 913      into a two element array with a constant index.  The base is
 914      then just the immediate underlying object.  */
 915   if (TREE_CODE (ref) == REALPART_EXPR)
 916     {
 917       ref = TREE_OPERAND (ref, 0);
 918       access_fns.safe_push (integer_zero_node);
 919     }
 920   else if (TREE_CODE (ref) == IMAGPART_EXPR)
 921     {
 922       ref = TREE_OPERAND (ref, 0);
 923       access_fns.safe_push (integer_one_node);
 924     }
 925
 926   /* Analyze access functions of dimensions we know to be independent.  */
 927   while (handled_component_p (ref))
 928     {
 929       if (TREE_CODE (ref) == ARRAY_REF)
 930         {
 931           op = TREE_OPERAND (ref, 1);
 932           access_fn = analyze_scalar_evolution (loop, op);
 933           access_fn = instantiate_scev (before_loop, loop, access_fn);
 934           access_fns.safe_push (access_fn);
 935         }
 936       else if (TREE_CODE (ref) == COMPONENT_REF
 937                && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
 938         {
 939           /* For COMPONENT_REFs of records (but not unions!) use the
 940              FIELD_DECL offset as constant access function so we can
 941              disambiguate a[i].f1 and a[i].f2.  */
 942           tree off = component_ref_field_offset (ref);
 943           off = size_binop (PLUS_EXPR,
 944                             size_binop (MULT_EXPR,
 945                                         fold_convert (bitsizetype, off),
 946                                         bitsize_int (BITS_PER_UNIT)),
 947                             DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
 948           access_fns.safe_push (off);
 949         }
 950       else
 951         /* If we have an unhandled component we could not translate
 952            to an access function stop analyzing.  We have determined
 953            our base object in this case.  */
 954         break;
 955
 956       ref = TREE_OPERAND (ref, 0);
 957     }
 958
 959   /* If the address operand of a MEM_REF base has an evolution in the
 960      analyzed nest, add it as an additional independent access-function.  */
 961   if (TREE_CODE (ref) == MEM_REF)
 962     {
 963       op = TREE_OPERAND (ref, 0);
 964       access_fn = analyze_scalar_evolution (loop, op);
 965       access_fn = instantiate_scev (before_loop, loop, access_fn);
 966       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
 967         {
 968           tree orig_type;
 969           tree memoff = TREE_OPERAND (ref, 1);
 970           base = initial_condition (access_fn);
 971           orig_type = TREE_TYPE (base);
 972           STRIP_USELESS_TYPE_CONVERSION (base);
 973           split_constant_offset (base, &base, &off);
 974           STRIP_USELESS_TYPE_CONVERSION (base);
 975           /* Fold the MEM_REF offset into the evolutions initial
 976              value to make more bases comparable.  */
 977           if (!integer_zerop (memoff))
 978             {
 979               off = size_binop (PLUS_EXPR, off,
 980                                 fold_convert (ssizetype, memoff));
 981               memoff = build_int_cst (TREE_TYPE (memoff), 0);
 982             }
 983           /* Adjust the offset so it is a multiple of the access type
 984              size and thus we separate bases that can possibly be used
 985              to produce partial overlaps (which the access_fn machinery
 986              cannot handle).  */
 987           wide_int rem;
 988           if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
 989               && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
 990               && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
 991             rem = wi::mod_trunc (off, TYPE_SIZE_UNIT (TREE_TYPE (ref)), SIGNED);
 992           else
 993             /* If we can't compute the remainder simply force the initial
 994                condition to zero.  */
 995             rem = off;
 996           off = wide_int_to_tree (ssizetype, wi::sub (off, rem));
 997           memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
 998           /* And finally replace the initial condition.  */
 999           access_fn = chrec_replace_initial_condition
1000               (access_fn, fold_convert (orig_type, off));
1001           /* ???  This is still not a suitable base object for
1002              dr_may_alias_p - the base object needs to be an
1003              access that covers the object as whole.  With
1004              an evolution in the pointer this cannot be
1005              guaranteed.
1006              As a band-aid, mark the access so we can special-case
1007              it in dr_may_alias_p.  */
1008           tree old = ref;
1009           ref = fold_build2_loc (EXPR_LOCATION (ref),
1010                                  MEM_REF, TREE_TYPE (ref),
1011                                  base, memoff);
1012           MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1013           MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1014           DR_UNCONSTRAINED_BASE (dr) = true;
1015           access_fns.safe_push (access_fn);
1016         }
1017     }
1018   else if (DECL_P (ref))
1019     {
1020       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1021       ref = build2 (MEM_REF, TREE_TYPE (ref),
1022                     build_fold_addr_expr (ref),
1023                     build_int_cst (reference_alias_ptr_type (ref), 0));
1024     }
1025
1026   DR_BASE_OBJECT (dr) = ref;
1027   DR_ACCESS_FNS (dr) = access_fns;
1028 }
1029
1030 /* Extracts the alias analysis information from the memory reference DR.  */
1031
1032 static void
1033 dr_analyze_alias (struct data_reference *dr)
1034 {
1035   tree ref = DR_REF (dr);
1036   tree base = get_base_address (ref), addr;
1037
1038   if (INDIRECT_REF_P (base)
1039       || TREE_CODE (base) == MEM_REF)
1040     {
1041       addr = TREE_OPERAND (base, 0);
1042       if (TREE_CODE (addr) == SSA_NAME)
1043         DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1044     }
1045 }
1046
1047 /* Frees data reference DR.  */
1048
1049 void
1050 free_data_ref (data_reference_p dr)
1051 {
1052   DR_ACCESS_FNS (dr).release ();
1053   free (dr);
1054 }
1055
1056 /* Analyzes memory reference MEMREF accessed in STMT.  The reference
1057    is read if IS_READ is true, write otherwise.  Returns the
1058    data_reference description of MEMREF.  NEST is the outermost loop
1059    in which the reference should be instantiated, LOOP is the loop in
1060    which the data reference should be analyzed.  */
1061
1062 struct data_reference *
1063 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple *stmt,
1064                  bool is_read)
1065 {
1066   struct data_reference *dr;
1067
1068   if (dump_file && (dump_flags & TDF_DETAILS))
1069     {
1070       fprintf (dump_file, "Creating dr for ");
1071       print_generic_expr (dump_file, memref, TDF_SLIM);
1072       fprintf (dump_file, "\n");
1073     }
1074
1075   dr = XCNEW (struct data_reference);
1076   DR_STMT (dr) = stmt;
1077   DR_REF (dr) = memref;
1078   DR_IS_READ (dr) = is_read;
1079
1080   dr_analyze_innermost (dr, nest);
1081   dr_analyze_indices (dr, nest, loop);
1082   dr_analyze_alias (dr);
1083
1084   if (dump_file && (dump_flags & TDF_DETAILS))
1085     {
1086       unsigned i;
1087       fprintf (dump_file, "\tbase_address: ");
1088       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1089       fprintf (dump_file, "\n\toffset from base address: ");
1090       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1091       fprintf (dump_file, "\n\tconstant offset from base address: ");
1092       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1093       fprintf (dump_file, "\n\tstep: ");
1094       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1095       fprintf (dump_file, "\n\taligned to: ");
1096       print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1097       fprintf (dump_file, "\n\tbase_object: ");
1098       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1099       fprintf (dump_file, "\n");
1100       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1101         {
1102           fprintf (dump_file, "\tAccess function %d: ", i);
1103           print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1104         }
1105     }
1106
1107   return dr;
1108 }
1109
1110 /*  A helper function computes order between two tree epxressions T1 and T2.
1111     This is used in comparator functions sorting objects based on the order
1112     of tree expressions.  The function returns -1, 0, or 1.  */
1113
1114 int
1115 data_ref_compare_tree (tree t1, tree t2)
1116 {
1117   int i, cmp;
1118   enum tree_code code;
1119   char tclass;
1120
1121   if (t1 == t2)
1122     return 0;
1123   if (t1 == NULL)
1124     return -1;
1125   if (t2 == NULL)
1126     return 1;
1127
1128   STRIP_NOPS (t1);
1129   STRIP_NOPS (t2);
1130
1131   if (TREE_CODE (t1) != TREE_CODE (t2))
1132     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1133
1134   code = TREE_CODE (t1);
1135   switch (code)
1136     {
1137     /* For const values, we can just use hash values for comparisons.  */
1138     case INTEGER_CST:
1139     case REAL_CST:
1140     case FIXED_CST:
1141     case STRING_CST:
1142     case COMPLEX_CST:
1143     case VECTOR_CST:
1144       {
1145         hashval_t h1 = iterative_hash_expr (t1, 0);
1146         hashval_t h2 = iterative_hash_expr (t2, 0);
1147         if (h1 != h2)
1148           return h1 < h2 ? -1 : 1;
1149         break;
1150       }
1151
1152     case SSA_NAME:
1153       cmp = data_ref_compare_tree (SSA_NAME_VAR (t1), SSA_NAME_VAR (t2));
1154       if (cmp != 0)
1155         return cmp;
1156
1157       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1158         return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1159       break;
1160
1161     default:
1162       tclass = TREE_CODE_CLASS (code);
1163
1164       /* For var-decl, we could compare their UIDs.  */
1165       if (tclass == tcc_declaration)
1166         {
1167           if (DECL_UID (t1) != DECL_UID (t2))
1168             return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1169           break;
1170         }
1171
1172       /* For expressions with operands, compare their operands recursively.  */
1173       for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1174         {
1175           cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1176                                        TREE_OPERAND (t2, i));
1177           if (cmp != 0)
1178             return cmp;
1179         }
1180     }
1181
1182   return 0;
1183 }
1184
1185 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1186    check.  */
1187
1188 bool
1189 runtime_alias_check_p (ddr_p ddr, struct loop *loop, bool speed_p)
1190 {
1191   if (dump_enabled_p ())
1192     {
1193       dump_printf (MSG_NOTE, "consider run-time aliasing test between ");
1194       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
1195       dump_printf (MSG_NOTE,  " and ");
1196       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
1197       dump_printf (MSG_NOTE, "\n");
1198     }
1199
1200   if (!speed_p)
1201     {
1202       if (dump_enabled_p ())
1203         dump_printf (MSG_MISSED_OPTIMIZATION,
1204                      "runtime alias check not supported when optimizing "
1205                      "for size.\n");
1206       return false;
1207     }
1208
1209   /* FORNOW: We don't support versioning with outer-loop in either
1210      vectorization or loop distribution.  */
1211   if (loop != NULL && loop->inner != NULL)
1212     {
1213       if (dump_enabled_p ())
1214         dump_printf (MSG_MISSED_OPTIMIZATION,
1215                      "runtime alias check not supported for outer loop.\n");
1216       return false;
1217     }
1218
1219   /* FORNOW: We don't support creating runtime alias tests for non-constant
1220      step.  */
1221   if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
1222       || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
1223     {
1224       if (dump_enabled_p ())
1225         dump_printf (MSG_MISSED_OPTIMIZATION,
1226                      "runtime alias check not supported for non-constant "
1227                      "step\n");
1228       return false;
1229     }
1230
1231   return true;
1232 }
1233
1234 /* Operator == between two dr_with_seg_len objects.
1235
1236    This equality operator is used to make sure two data refs
1237    are the same one so that we will consider to combine the
1238    aliasing checks of those two pairs of data dependent data
1239    refs.  */
1240
1241 static bool
1242 operator == (const dr_with_seg_len& d1,
1243              const dr_with_seg_len& d2)
1244 {
1245   return operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1246                           DR_BASE_ADDRESS (d2.dr), 0)
1247            && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1248            && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1249            && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0;
1250 }
1251
1252 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1253    so that we can combine aliasing checks in one scan.  */
1254
1255 static int
1256 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1257 {
1258   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1259   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1260   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1261   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1262
1263   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1264      if a and c have the same basic address snd step, and b and d have the same
1265      address and step.  Therefore, if any a&c or b&d don't have the same address
1266      and step, we don't care the order of those two pairs after sorting.  */
1267   int comp_res;
1268
1269   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1270                                          DR_BASE_ADDRESS (b1.dr))) != 0)
1271     return comp_res;
1272   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1273                                          DR_BASE_ADDRESS (b2.dr))) != 0)
1274     return comp_res;
1275   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1276                                          DR_STEP (b1.dr))) != 0)
1277     return comp_res;
1278   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1279                                          DR_STEP (b2.dr))) != 0)
1280     return comp_res;
1281   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1282                                          DR_OFFSET (b1.dr))) != 0)
1283     return comp_res;
1284   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1285                                          DR_INIT (b1.dr))) != 0)
1286     return comp_res;
1287   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1288                                          DR_OFFSET (b2.dr))) != 0)
1289     return comp_res;
1290   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1291                                          DR_INIT (b2.dr))) != 0)
1292     return comp_res;
1293
1294   return 0;
1295 }
1296
1297 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1298    FACTOR is number of iterations that each data reference is accessed.
1299
1300    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1301    we create an expression:
1302
1303    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1304    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1305
1306    for aliasing checks.  However, in some cases we can decrease the number
1307    of checks by combining two checks into one.  For example, suppose we have
1308    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1309    condition is satisfied:
1310
1311    load_ptr_0 < load_ptr_1  &&
1312    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1313
1314    (this condition means, in each iteration of vectorized loop, the accessed
1315    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1316    load_ptr_1.)
1317
1318    we then can use only the following expression to finish the alising checks
1319    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1320
1321    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1322    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1323
1324    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1325    basic address.  */
1326
1327 void
1328 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1329                                unsigned HOST_WIDE_INT factor)
1330 {
1331   /* Sort the collected data ref pairs so that we can scan them once to
1332      combine all possible aliasing checks.  */
1333   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1334
1335   /* Scan the sorted dr pairs and check if we can combine alias checks
1336      of two neighboring dr pairs.  */
1337   for (size_t i = 1; i < alias_pairs->length (); ++i)
1338     {
1339       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1340       dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
1341                       *dr_b1 = &(*alias_pairs)[i-1].second,
1342                       *dr_a2 = &(*alias_pairs)[i].first,
1343                       *dr_b2 = &(*alias_pairs)[i].second;
1344
1345       /* Remove duplicate data ref pairs.  */
1346       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1347         {
1348           if (dump_enabled_p ())
1349             {
1350               dump_printf (MSG_NOTE, "found equal ranges ");
1351               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1352               dump_printf (MSG_NOTE,  ", ");
1353               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1354               dump_printf (MSG_NOTE,  " and ");
1355               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1356               dump_printf (MSG_NOTE,  ", ");
1357               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1358               dump_printf (MSG_NOTE, "\n");
1359             }
1360           alias_pairs->ordered_remove (i--);
1361           continue;
1362         }
1363
1364       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1365         {
1366           /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1367              and DR_A1 and DR_A2 are two consecutive memrefs.  */
1368           if (*dr_a1 == *dr_a2)
1369             {
1370               std::swap (dr_a1, dr_b1);
1371               std::swap (dr_a2, dr_b2);
1372             }
1373
1374           if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1375                                 DR_BASE_ADDRESS (dr_a2->dr), 0)
1376               || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1377                                    DR_OFFSET (dr_a2->dr), 0)
1378               || !tree_fits_shwi_p (DR_INIT (dr_a1->dr))
1379               || !tree_fits_shwi_p (DR_INIT (dr_a2->dr)))
1380             continue;
1381
1382           /* Only merge const step data references.  */
1383           if (TREE_CODE (DR_STEP (dr_a1->dr)) != INTEGER_CST
1384               || TREE_CODE (DR_STEP (dr_a2->dr)) != INTEGER_CST)
1385             continue;
1386
1387           /* DR_A1 and DR_A2 must goes in the same direction.  */
1388           if (tree_int_cst_compare (DR_STEP (dr_a1->dr), size_zero_node)
1389               != tree_int_cst_compare (DR_STEP (dr_a2->dr), size_zero_node))
1390             continue;
1391
1392           bool neg_step
1393             = (tree_int_cst_compare (DR_STEP (dr_a1->dr), size_zero_node) < 0);
1394
1395           /* We need to compute merged segment length at compilation time for
1396              dr_a1 and dr_a2, which is impossible if either one has non-const
1397              segment length.  */
1398           if ((!tree_fits_uhwi_p (dr_a1->seg_len)
1399                || !tree_fits_uhwi_p (dr_a2->seg_len))
1400               && tree_int_cst_compare (DR_STEP (dr_a1->dr),
1401                                        DR_STEP (dr_a2->dr)) != 0)
1402             continue;
1403
1404           /* Make sure dr_a1 starts left of dr_a2.  */
1405           if (tree_int_cst_lt (DR_INIT (dr_a2->dr), DR_INIT (dr_a1->dr)))
1406             std::swap (*dr_a1, *dr_a2);
1407
1408           bool do_remove = false;
1409           wide_int diff = wi::sub (DR_INIT (dr_a2->dr), DR_INIT (dr_a1->dr));
1410           wide_int min_seg_len_b;
1411           tree new_seg_len;
1412
1413           if (TREE_CODE (dr_b1->seg_len) == INTEGER_CST)
1414             min_seg_len_b = wi::abs (dr_b1->seg_len);
1415           else
1416             min_seg_len_b = wi::mul (factor, wi::abs (DR_STEP (dr_b1->dr)));
1417
1418           /* Now we try to merge alias check dr_a1 & dr_b and dr_a2 & dr_b.
1419
1420              Case A:
1421                check if the following condition is satisfied:
1422
1423                DIFF - SEGMENT_LENGTH_A < SEGMENT_LENGTH_B
1424
1425                where DIFF = DR_A2_INIT - DR_A1_INIT.  However,
1426                SEGMENT_LENGTH_A or SEGMENT_LENGTH_B may not be constant so we
1427                have to make a best estimation.  We can get the minimum value
1428                of SEGMENT_LENGTH_B as a constant, represented by MIN_SEG_LEN_B,
1429                then either of the following two conditions can guarantee the
1430                one above:
1431
1432                1: DIFF <= MIN_SEG_LEN_B
1433                2: DIFF - SEGMENT_LENGTH_A < MIN_SEG_LEN_B
1434                   Because DIFF - SEGMENT_LENGTH_A is done in sizetype, we need
1435                   to take care of wrapping behavior in it.
1436
1437              Case B:
1438                If the left segment does not extend beyond the start of the
1439                right segment the new segment length is that of the right
1440                plus the segment distance.  The condition is like:
1441
1442                DIFF >= SEGMENT_LENGTH_A   ;SEGMENT_LENGTH_A is a constant.
1443
1444              Note 1: Case A.2 and B combined together effectively merges every
1445              dr_a1 & dr_b and dr_a2 & dr_b when SEGMENT_LENGTH_A is const.
1446
1447              Note 2: Above description is based on positive DR_STEP, we need to
1448              take care of negative DR_STEP for wrapping behavior.  See PR80815
1449              for more information.  */
1450           if (neg_step)
1451             {
1452               /* Adjust diff according to access size of both references.  */
1453               tree size_a1 = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a1->dr)));
1454               tree size_a2 = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a2->dr)));
1455               diff = wi::add (diff, wi::sub (size_a2, size_a1));
1456               /* Case A.1.  */
1457               if (wi::leu_p (diff, min_seg_len_b)
1458                   /* Case A.2 and B combined.  */
1459                   || (tree_fits_uhwi_p (dr_a2->seg_len)))
1460                 {
1461                   if (tree_fits_uhwi_p (dr_a1->seg_len)
1462                       && tree_fits_uhwi_p (dr_a2->seg_len))
1463                     new_seg_len
1464                       = wide_int_to_tree (sizetype,
1465                                           wi::umin (wi::sub (dr_a1->seg_len,
1466                                                              diff),
1467                                                     dr_a2->seg_len));
1468                   else
1469                     new_seg_len
1470                       = size_binop (MINUS_EXPR, dr_a2->seg_len,
1471                                     wide_int_to_tree (sizetype, diff));
1472
1473                   dr_a2->seg_len = new_seg_len;
1474                   do_remove = true;
1475                 }
1476             }
1477           else
1478             {
1479               /* Case A.1.  */
1480               if (wi::leu_p (diff, min_seg_len_b)
1481                   /* Case A.2 and B combined.  */
1482                   || (tree_fits_uhwi_p (dr_a1->seg_len)))
1483                 {
1484                   if (tree_fits_uhwi_p (dr_a1->seg_len)
1485                       && tree_fits_uhwi_p (dr_a2->seg_len))
1486                     new_seg_len
1487                       = wide_int_to_tree (sizetype,
1488                                           wi::umax (wi::add (dr_a2->seg_len,
1489                                                              diff),
1490                                                     dr_a1->seg_len));
1491                   else
1492                     new_seg_len
1493                       = size_binop (PLUS_EXPR, dr_a2->seg_len,
1494                                     wide_int_to_tree (sizetype, diff));
1495
1496                   dr_a1->seg_len = new_seg_len;
1497                   do_remove = true;
1498                 }
1499             }
1500
1501           if (do_remove)
1502             {
1503               if (dump_enabled_p ())
1504                 {
1505                   dump_printf (MSG_NOTE, "merging ranges for ");
1506                   dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
1507                   dump_printf (MSG_NOTE,  ", ");
1508                   dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
1509                   dump_printf (MSG_NOTE,  " and ");
1510                   dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
1511                   dump_printf (MSG_NOTE,  ", ");
1512                   dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
1513                   dump_printf (MSG_NOTE, "\n");
1514                 }
1515               alias_pairs->ordered_remove (neg_step ? i - 1 : i);
1516               i--;
1517             }
1518         }
1519     }
1520 }
1521
1522 /* Given LOOP's two data references and segment lengths described by DR_A
1523    and DR_B, create expression checking if the two addresses ranges intersect
1524    with each other based on index of the two addresses.  This can only be
1525    done if DR_A and DR_B referring to the same (array) object and the index
1526    is the only difference.  For example:
1527
1528                        DR_A                           DR_B
1529       data-ref         arr[i]                         arr[j]
1530       base_object      arr                            arr
1531       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
1532
1533    The addresses and their index are like:
1534
1535         |<- ADDR_A    ->|          |<- ADDR_B    ->|
1536      ------------------------------------------------------->
1537         |   |   |   |   |          |   |   |   |   |
1538      ------------------------------------------------------->
1539         i_0 ...         i_0+4      j_0 ...         j_0+4
1540
1541    We can create expression based on index rather than address:
1542
1543      (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1544
1545    Note evolution step of index needs to be considered in comparison.  */
1546
1547 static bool
1548 create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
1549                                      const dr_with_seg_len& dr_a,
1550                                      const dr_with_seg_len& dr_b)
1551 {
1552   if (integer_zerop (DR_STEP (dr_a.dr))
1553       || integer_zerop (DR_STEP (dr_b.dr))
1554       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1555     return false;
1556
1557   if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
1558     return false;
1559
1560   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1561     return false;
1562
1563   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1564     return false;
1565
1566   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1567     return false;
1568
1569   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1570
1571   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1572   unsigned HOST_WIDE_INT abs_step
1573     = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr)));
1574
1575   unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
1576   unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
1577   /* Infer the number of iterations with which the memory segment is accessed
1578      by DR.  In other words, alias is checked if memory segment accessed by
1579      DR_A in some iterations intersect with memory segment accessed by DR_B
1580      in the same amount iterations.
1581      Note segnment length is a linear function of number of iterations with
1582      DR_STEP as the coefficient.  */
1583   unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step;
1584   unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step;
1585
1586   unsigned int i;
1587   for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1588     {
1589       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1590       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1591       /* Two indices must be the same if they are not scev, or not scev wrto
1592          current loop being vecorized.  */
1593       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1594           || TREE_CODE (access2) != POLYNOMIAL_CHREC
1595           || CHREC_VARIABLE (access1) != (unsigned)loop->num
1596           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1597         {
1598           if (operand_equal_p (access1, access2, 0))
1599             continue;
1600
1601           return false;
1602         }
1603       /* The two indices must have the same step.  */
1604       if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1605         return false;
1606
1607       tree idx_step = CHREC_RIGHT (access1);
1608       /* Index must have const step, otherwise DR_STEP won't be constant.  */
1609       gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1610       /* Index must evaluate in the same direction as DR.  */
1611       gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1612
1613       tree min1 = CHREC_LEFT (access1);
1614       tree min2 = CHREC_LEFT (access2);
1615       if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1616         return false;
1617
1618       /* Ideally, alias can be checked against loop's control IV, but we
1619          need to prove linear mapping between control IV and reference
1620          index.  Although that should be true, we check against (array)
1621          index of data reference.  Like segment length, index length is
1622          linear function of the number of iterations with index_step as
1623          the coefficient, i.e, niter_len * idx_step.  */
1624       tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1625                                    build_int_cst (TREE_TYPE (min1),
1626                                                   niter_len1));
1627       tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1628                                    build_int_cst (TREE_TYPE (min2),
1629                                                   niter_len2));
1630       tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
1631       tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
1632       /* Adjust ranges for negative step.  */
1633       if (neg_step)
1634         {
1635           min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step);
1636           max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1),
1637                               CHREC_LEFT (access1), idx_step);
1638           min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step);
1639           max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2),
1640                               CHREC_LEFT (access2), idx_step);
1641         }
1642       tree part_cond_expr
1643         = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1644             fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
1645             fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
1646       if (*cond_expr)
1647         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1648                                   *cond_expr, part_cond_expr);
1649       else
1650         *cond_expr = part_cond_expr;
1651     }
1652   return true;
1653 }
1654
1655 /* Given two data references and segment lengths described by DR_A and DR_B,
1656    create expression checking if the two addresses ranges intersect with
1657    each other:
1658
1659      ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
1660      || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0))  */
1661
1662 static void
1663 create_intersect_range_checks (struct loop *loop, tree *cond_expr,
1664                                const dr_with_seg_len& dr_a,
1665                                const dr_with_seg_len& dr_b)
1666 {
1667   *cond_expr = NULL_TREE;
1668   if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
1669     return;
1670
1671   tree segment_length_a = dr_a.seg_len;
1672   tree segment_length_b = dr_b.seg_len;
1673   tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
1674   tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
1675   tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
1676
1677   offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
1678                           offset_a, DR_INIT (dr_a.dr));
1679   offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
1680                           offset_b, DR_INIT (dr_b.dr));
1681   addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
1682   addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
1683
1684   tree seg_a_min = addr_base_a;
1685   tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
1686   /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
1687      bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
1688      [a, a+12) */
1689   if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
1690     {
1691       tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
1692       seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
1693       seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
1694     }
1695
1696   tree seg_b_min = addr_base_b;
1697   tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
1698   if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
1699     {
1700       tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
1701       seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
1702       seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
1703     }
1704   *cond_expr
1705     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1706         fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
1707         fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
1708 }
1709
1710 /* Create a conditional expression that represents the run-time checks for
1711    overlapping of address ranges represented by a list of data references
1712    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
1713    COND_EXPR is the conditional expression to be used in the if statement
1714    that controls which version of the loop gets executed at runtime.  */
1715
1716 void
1717 create_runtime_alias_checks (struct loop *loop,
1718                              vec<dr_with_seg_len_pair_t> *alias_pairs,
1719                              tree * cond_expr)
1720 {
1721   tree part_cond_expr;
1722
1723   for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
1724     {
1725       const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
1726       const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
1727
1728       if (dump_enabled_p ())
1729         {
1730           dump_printf (MSG_NOTE, "create runtime check for data references ");
1731           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
1732           dump_printf (MSG_NOTE, " and ");
1733           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
1734           dump_printf (MSG_NOTE, "\n");
1735         }
1736
1737       /* Create condition expression for each pair data references.  */
1738       create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
1739       if (*cond_expr)
1740         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1741                                   *cond_expr, part_cond_expr);
1742       else
1743         *cond_expr = part_cond_expr;
1744     }
1745 }
1746
1747 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1748    expressions.  */
1749 static bool
1750 dr_equal_offsets_p1 (tree offset1, tree offset2)
1751 {
1752   bool res;
1753
1754   STRIP_NOPS (offset1);
1755   STRIP_NOPS (offset2);
1756
1757   if (offset1 == offset2)
1758     return true;
1759
1760   if (TREE_CODE (offset1) != TREE_CODE (offset2)
1761       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1762     return false;
1763
1764   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1765                              TREE_OPERAND (offset2, 0));
1766
1767   if (!res || !BINARY_CLASS_P (offset1))
1768     return res;
1769
1770   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1771                              TREE_OPERAND (offset2, 1));
1772
1773   return res;
1774 }
1775
1776 /* Check if DRA and DRB have equal offsets.  */
1777 bool
1778 dr_equal_offsets_p (struct data_reference *dra,
1779                     struct data_reference *drb)
1780 {
1781   tree offset1, offset2;
1782
1783   offset1 = DR_OFFSET (dra);
1784   offset2 = DR_OFFSET (drb);
1785
1786   return dr_equal_offsets_p1 (offset1, offset2);
1787 }
1788
1789 /* Returns true if FNA == FNB.  */
1790
1791 static bool
1792 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1793 {
1794   unsigned i, n = fna.length ();
1795
1796   if (n != fnb.length ())
1797     return false;
1798
1799   for (i = 0; i < n; i++)
1800     if (!operand_equal_p (fna[i], fnb[i], 0))
1801       return false;
1802
1803   return true;
1804 }
1805
1806 /* If all the functions in CF are the same, returns one of them,
1807    otherwise returns NULL.  */
1808
1809 static affine_fn
1810 common_affine_function (conflict_function *cf)
1811 {
1812   unsigned i;
1813   affine_fn comm;
1814
1815   if (!CF_NONTRIVIAL_P (cf))
1816     return affine_fn ();
1817
1818   comm = cf->fns[0];
1819
1820   for (i = 1; i < cf->n; i++)
1821     if (!affine_function_equal_p (comm, cf->fns[i]))
1822       return affine_fn ();
1823
1824   return comm;
1825 }
1826
1827 /* Returns the base of the affine function FN.  */
1828
1829 static tree
1830 affine_function_base (affine_fn fn)
1831 {
1832   return fn[0];
1833 }
1834
1835 /* Returns true if FN is a constant.  */
1836
1837 static bool
1838 affine_function_constant_p (affine_fn fn)
1839 {
1840   unsigned i;
1841   tree coef;
1842
1843   for (i = 1; fn.iterate (i, &coef); i++)
1844     if (!integer_zerop (coef))
1845       return false;
1846
1847   return true;
1848 }
1849
1850 /* Returns true if FN is the zero constant function.  */
1851
1852 static bool
1853 affine_function_zero_p (affine_fn fn)
1854 {
1855   return (integer_zerop (affine_function_base (fn))
1856           && affine_function_constant_p (fn));
1857 }
1858
1859 /* Returns a signed integer type with the largest precision from TA
1860    and TB.  */
1861
1862 static tree
1863 signed_type_for_types (tree ta, tree tb)
1864 {
1865   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1866     return signed_type_for (ta);
1867   else
1868     return signed_type_for (tb);
1869 }
1870
1871 /* Applies operation OP on affine functions FNA and FNB, and returns the
1872    result.  */
1873
1874 static affine_fn
1875 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1876 {
1877   unsigned i, n, m;
1878   affine_fn ret;
1879   tree coef;
1880
1881   if (fnb.length () > fna.length ())
1882     {
1883       n = fna.length ();
1884       m = fnb.length ();
1885     }
1886   else
1887     {
1888       n = fnb.length ();
1889       m = fna.length ();
1890     }
1891
1892   ret.create (m);
1893   for (i = 0; i < n; i++)
1894     {
1895       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
1896                                          TREE_TYPE (fnb[i]));
1897       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
1898     }
1899
1900   for (; fna.iterate (i, &coef); i++)
1901     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1902                                  coef, integer_zero_node));
1903   for (; fnb.iterate (i, &coef); i++)
1904     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1905                                  integer_zero_node, coef));
1906
1907   return ret;
1908 }
1909
1910 /* Returns the sum of affine functions FNA and FNB.  */
1911
1912 static affine_fn
1913 affine_fn_plus (affine_fn fna, affine_fn fnb)
1914 {
1915   return affine_fn_op (PLUS_EXPR, fna, fnb);
1916 }
1917
1918 /* Returns the difference of affine functions FNA and FNB.  */
1919
1920 static affine_fn
1921 affine_fn_minus (affine_fn fna, affine_fn fnb)
1922 {
1923   return affine_fn_op (MINUS_EXPR, fna, fnb);
1924 }
1925
1926 /* Frees affine function FN.  */
1927
1928 static void
1929 affine_fn_free (affine_fn fn)
1930 {
1931   fn.release ();
1932 }
1933
1934 /* Determine for each subscript in the data dependence relation DDR
1935    the distance.  */
1936
1937 static void
1938 compute_subscript_distance (struct data_dependence_relation *ddr)
1939 {
1940   conflict_function *cf_a, *cf_b;
1941   affine_fn fn_a, fn_b, diff;
1942
1943   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1944     {
1945       unsigned int i;
1946
1947       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1948         {
1949           struct subscript *subscript;
1950
1951           subscript = DDR_SUBSCRIPT (ddr, i);
1952           cf_a = SUB_CONFLICTS_IN_A (subscript);
1953           cf_b = SUB_CONFLICTS_IN_B (subscript);
1954
1955           fn_a = common_affine_function (cf_a);
1956           fn_b = common_affine_function (cf_b);
1957           if (!fn_a.exists () || !fn_b.exists ())
1958             {
1959               SUB_DISTANCE (subscript) = chrec_dont_know;
1960               return;
1961             }
1962           diff = affine_fn_minus (fn_a, fn_b);
1963
1964           if (affine_function_constant_p (diff))
1965             SUB_DISTANCE (subscript) = affine_function_base (diff);
1966           else
1967             SUB_DISTANCE (subscript) = chrec_dont_know;
1968
1969           affine_fn_free (diff);
1970         }
1971     }
1972 }
1973
1974 /* Returns the conflict function for "unknown".  */
1975
1976 static conflict_function *
1977 conflict_fn_not_known (void)
1978 {
1979   conflict_function *fn = XCNEW (conflict_function);
1980   fn->n = NOT_KNOWN;
1981
1982   return fn;
1983 }
1984
1985 /* Returns the conflict function for "independent".  */
1986
1987 static conflict_function *
1988 conflict_fn_no_dependence (void)
1989 {
1990   conflict_function *fn = XCNEW (conflict_function);
1991   fn->n = NO_DEPENDENCE;
1992
1993   return fn;
1994 }
1995
1996 /* Returns true if the address of OBJ is invariant in LOOP.  */
1997
1998 static bool
1999 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
2000 {
2001   while (handled_component_p (obj))
2002     {
2003       if (TREE_CODE (obj) == ARRAY_REF)
2004         {
2005           /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
2006              need to check the stride and the lower bound of the reference.  */
2007           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2008                                                       loop->num)
2009               || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
2010                                                          loop->num))
2011             return false;
2012         }
2013       else if (TREE_CODE (obj) == COMPONENT_REF)
2014         {
2015           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2016                                                       loop->num))
2017             return false;
2018         }
2019       obj = TREE_OPERAND (obj, 0);
2020     }
2021
2022   if (!INDIRECT_REF_P (obj)
2023       && TREE_CODE (obj) != MEM_REF)
2024     return true;
2025
2026   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2027                                                   loop->num);
2028 }
2029
2030 /* Returns false if we can prove that data references A and B do not alias,
2031    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2032    considered.  */
2033
2034 bool
2035 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2036                 bool loop_nest)
2037 {
2038   tree addr_a = DR_BASE_OBJECT (a);
2039   tree addr_b = DR_BASE_OBJECT (b);
2040
2041   /* If we are not processing a loop nest but scalar code we
2042      do not need to care about possible cross-iteration dependences
2043      and thus can process the full original reference.  Do so,
2044      similar to how loop invariant motion applies extra offset-based
2045      disambiguation.  */
2046   if (!loop_nest)
2047     {
2048       aff_tree off1, off2;
2049       widest_int size1, size2;
2050       get_inner_reference_aff (DR_REF (a), &off1, &size1);
2051       get_inner_reference_aff (DR_REF (b), &off2, &size2);
2052       aff_combination_scale (&off1, -1);
2053       aff_combination_add (&off2, &off1);
2054       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2055         return false;
2056     }
2057
2058   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2059       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2060       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2061       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2062     return false;
2063
2064   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2065      do not know the size of the base-object.  So we cannot do any
2066      offset/overlap based analysis but have to rely on points-to
2067      information only.  */
2068   if (TREE_CODE (addr_a) == MEM_REF
2069       && (DR_UNCONSTRAINED_BASE (a)
2070           || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2071     {
2072       /* For true dependences we can apply TBAA.  */
2073       if (flag_strict_aliasing
2074           && DR_IS_WRITE (a) && DR_IS_READ (b)
2075           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2076                                      get_alias_set (DR_REF (b))))
2077         return false;
2078       if (TREE_CODE (addr_b) == MEM_REF)
2079         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2080                                        TREE_OPERAND (addr_b, 0));
2081       else
2082         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2083                                        build_fold_addr_expr (addr_b));
2084     }
2085   else if (TREE_CODE (addr_b) == MEM_REF
2086            && (DR_UNCONSTRAINED_BASE (b)
2087                || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
2088     {
2089       /* For true dependences we can apply TBAA.  */
2090       if (flag_strict_aliasing
2091           && DR_IS_WRITE (a) && DR_IS_READ (b)
2092           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2093                                      get_alias_set (DR_REF (b))))
2094         return false;
2095       if (TREE_CODE (addr_a) == MEM_REF)
2096         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2097                                        TREE_OPERAND (addr_b, 0));
2098       else
2099         return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
2100                                        TREE_OPERAND (addr_b, 0));
2101     }
2102
2103   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2104      that is being subsetted in the loop nest.  */
2105   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2106     return refs_output_dependent_p (addr_a, addr_b);
2107   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2108     return refs_anti_dependent_p (addr_a, addr_b);
2109   return refs_may_alias_p (addr_a, addr_b);
2110 }
2111
2112 /* Initialize a data dependence relation between data accesses A and
2113    B.  NB_LOOPS is the number of loops surrounding the references: the
2114    size of the classic distance/direction vectors.  */
2115
2116 struct data_dependence_relation *
2117 initialize_data_dependence_relation (struct data_reference *a,
2118                                      struct data_reference *b,
2119                                      vec<loop_p> loop_nest)
2120 {
2121   struct data_dependence_relation *res;
2122   unsigned int i;
2123
2124   res = XNEW (struct data_dependence_relation);
2125   DDR_A (res) = a;
2126   DDR_B (res) = b;
2127   DDR_LOOP_NEST (res).create (0);
2128   DDR_REVERSED_P (res) = false;
2129   DDR_SUBSCRIPTS (res).create (0);
2130   DDR_DIR_VECTS (res).create (0);
2131   DDR_DIST_VECTS (res).create (0);
2132
2133   if (a == NULL || b == NULL)
2134     {
2135       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2136       return res;
2137     }
2138
2139   /* If the data references do not alias, then they are independent.  */
2140   if (!dr_may_alias_p (a, b, loop_nest.exists ()))
2141     {
2142       DDR_ARE_DEPENDENT (res) = chrec_known;
2143       return res;
2144     }
2145
2146   /* The case where the references are exactly the same.  */
2147   if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
2148     {
2149       if ((loop_nest.exists ()
2150            && !object_address_invariant_in_loop_p (loop_nest[0],
2151                                                    DR_BASE_OBJECT (a)))
2152           || DR_NUM_DIMENSIONS (a) == 0)
2153         {
2154           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2155           return res;
2156         }
2157       DDR_AFFINE_P (res) = true;
2158       DDR_ARE_DEPENDENT (res) = NULL_TREE;
2159       DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
2160       DDR_LOOP_NEST (res) = loop_nest;
2161       DDR_INNER_LOOP (res) = 0;
2162       DDR_SELF_REFERENCE (res) = true;
2163       for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
2164        {
2165          struct subscript *subscript;
2166
2167          subscript = XNEW (struct subscript);
2168          SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
2169          SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
2170          SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
2171          SUB_DISTANCE (subscript) = chrec_dont_know;
2172          DDR_SUBSCRIPTS (res).safe_push (subscript);
2173        }
2174       return res;
2175     }
2176
2177   /* If the references do not access the same object, we do not know
2178      whether they alias or not.  We do not care about TBAA or alignment
2179      info so we can use OEP_ADDRESS_OF to avoid false negatives.
2180      But the accesses have to use compatible types as otherwise the
2181      built indices would not match.  */
2182   if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), OEP_ADDRESS_OF)
2183       || !types_compatible_p (TREE_TYPE (DR_BASE_OBJECT (a)),
2184                               TREE_TYPE (DR_BASE_OBJECT (b))))
2185     {
2186       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2187       return res;
2188     }
2189
2190   /* If the base of the object is not invariant in the loop nest, we cannot
2191      analyze it.  TODO -- in fact, it would suffice to record that there may
2192      be arbitrary dependences in the loops where the base object varies.  */
2193   if ((loop_nest.exists ()
2194        && !object_address_invariant_in_loop_p (loop_nest[0], DR_BASE_OBJECT (a)))
2195       || DR_NUM_DIMENSIONS (a) == 0)
2196     {
2197       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2198       return res;
2199     }
2200
2201   /* If the number of dimensions of the access to not agree we can have
2202      a pointer access to a component of the array element type and an
2203      array access while the base-objects are still the same.  Punt.  */
2204   if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
2205     {
2206       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2207       return res;
2208     }
2209
2210   DDR_AFFINE_P (res) = true;
2211   DDR_ARE_DEPENDENT (res) = NULL_TREE;
2212   DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
2213   DDR_LOOP_NEST (res) = loop_nest;
2214   DDR_INNER_LOOP (res) = 0;
2215   DDR_SELF_REFERENCE (res) = false;
2216
2217   for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
2218     {
2219       struct subscript *subscript;
2220
2221       subscript = XNEW (struct subscript);
2222       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
2223       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
2224       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
2225       SUB_DISTANCE (subscript) = chrec_dont_know;
2226       DDR_SUBSCRIPTS (res).safe_push (subscript);
2227     }
2228
2229   return res;
2230 }
2231
2232 /* Frees memory used by the conflict function F.  */
2233
2234 static void
2235 free_conflict_function (conflict_function *f)
2236 {
2237   unsigned i;
2238
2239   if (CF_NONTRIVIAL_P (f))
2240     {
2241       for (i = 0; i < f->n; i++)
2242         affine_fn_free (f->fns[i]);
2243     }
2244   free (f);
2245 }
2246
2247 /* Frees memory used by SUBSCRIPTS.  */
2248
2249 static void
2250 free_subscripts (vec<subscript_p> subscripts)
2251 {
2252   unsigned i;
2253   subscript_p s;
2254
2255   FOR_EACH_VEC_ELT (subscripts, i, s)
2256     {
2257       free_conflict_function (s->conflicting_iterations_in_a);
2258       free_conflict_function (s->conflicting_iterations_in_b);
2259       free (s);
2260     }
2261   subscripts.release ();
2262 }
2263
2264 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
2265    description.  */
2266
2267 static inline void
2268 finalize_ddr_dependent (struct data_dependence_relation *ddr,
2269                         tree chrec)
2270 {
2271   DDR_ARE_DEPENDENT (ddr) = chrec;
2272   free_subscripts (DDR_SUBSCRIPTS (ddr));
2273   DDR_SUBSCRIPTS (ddr).create (0);
2274 }
2275
2276 /* The dependence relation DDR cannot be represented by a distance
2277    vector.  */
2278
2279 static inline void
2280 non_affine_dependence_relation (struct data_dependence_relation *ddr)
2281 {
2282   if (dump_file && (dump_flags & TDF_DETAILS))
2283     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
2284
2285   DDR_AFFINE_P (ddr) = false;
2286 }
2287
2288 \f
2289
2290 /* This section contains the classic Banerjee tests.  */
2291
2292 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
2293    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
2294
2295 static inline bool
2296 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2297 {
2298   return (evolution_function_is_constant_p (chrec_a)
2299           && evolution_function_is_constant_p (chrec_b));
2300 }
2301
2302 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
2303    variable, i.e., if the SIV (Single Index Variable) test is true.  */
2304
2305 static bool
2306 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2307 {
2308   if ((evolution_function_is_constant_p (chrec_a)
2309        && evolution_function_is_univariate_p (chrec_b))
2310       || (evolution_function_is_constant_p (chrec_b)
2311           && evolution_function_is_univariate_p (chrec_a)))
2312     return true;
2313
2314   if (evolution_function_is_univariate_p (chrec_a)
2315       && evolution_function_is_univariate_p (chrec_b))
2316     {
2317       switch (TREE_CODE (chrec_a))
2318         {
2319         case POLYNOMIAL_CHREC:
2320           switch (TREE_CODE (chrec_b))
2321             {
2322             case POLYNOMIAL_CHREC:
2323               if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
2324                 return false;
2325               /* FALLTHRU */
2326
2327             default:
2328               return true;
2329             }
2330
2331         default:
2332           return true;
2333         }
2334     }
2335
2336   return false;
2337 }
2338
2339 /* Creates a conflict function with N dimensions.  The affine functions
2340    in each dimension follow.  */
2341
2342 static conflict_function *
2343 conflict_fn (unsigned n, ...)
2344 {
2345   unsigned i;
2346   conflict_function *ret = XCNEW (conflict_function);
2347   va_list ap;
2348
2349   gcc_assert (0 < n && n <= MAX_DIM);
2350   va_start (ap, n);
2351
2352   ret->n = n;
2353   for (i = 0; i < n; i++)
2354     ret->fns[i] = va_arg (ap, affine_fn);
2355   va_end (ap);
2356
2357   return ret;
2358 }
2359
2360 /* Returns constant affine function with value CST.  */
2361
2362 static affine_fn
2363 affine_fn_cst (tree cst)
2364 {
2365   affine_fn fn;
2366   fn.create (1);
2367   fn.quick_push (cst);
2368   return fn;
2369 }
2370
2371 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
2372
2373 static affine_fn
2374 affine_fn_univar (tree cst, unsigned dim, tree coef)
2375 {
2376   affine_fn fn;
2377   fn.create (dim + 1);
2378   unsigned i;
2379
2380   gcc_assert (dim > 0);
2381   fn.quick_push (cst);
2382   for (i = 1; i < dim; i++)
2383     fn.quick_push (integer_zero_node);
2384   fn.quick_push (coef);
2385   return fn;
2386 }
2387
2388 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
2389    *OVERLAPS_B are initialized to the functions that describe the
2390    relation between the elements accessed twice by CHREC_A and
2391    CHREC_B.  For k >= 0, the following property is verified:
2392
2393    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2394
2395 static void
2396 analyze_ziv_subscript (tree chrec_a,
2397                        tree chrec_b,
2398                        conflict_function **overlaps_a,
2399                        conflict_function **overlaps_b,
2400                        tree *last_conflicts)
2401 {
2402   tree type, difference;
2403   dependence_stats.num_ziv++;
2404
2405   if (dump_file && (dump_flags & TDF_DETAILS))
2406     fprintf (dump_file, "(analyze_ziv_subscript \n");
2407
2408   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2409   chrec_a = chrec_convert (type, chrec_a, NULL);
2410   chrec_b = chrec_convert (type, chrec_b, NULL);
2411   difference = chrec_fold_minus (type, chrec_a, chrec_b);
2412
2413   switch (TREE_CODE (difference))
2414     {
2415     case INTEGER_CST:
2416       if (integer_zerop (difference))
2417         {
2418           /* The difference is equal to zero: the accessed index
2419              overlaps for each iteration in the loop.  */
2420           *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2421           *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2422           *last_conflicts = chrec_dont_know;
2423           dependence_stats.num_ziv_dependent++;
2424         }
2425       else
2426         {
2427           /* The accesses do not overlap.  */
2428           *overlaps_a = conflict_fn_no_dependence ();
2429           *overlaps_b = conflict_fn_no_dependence ();
2430           *last_conflicts = integer_zero_node;
2431           dependence_stats.num_ziv_independent++;
2432         }
2433       break;
2434
2435     default:
2436       /* We're not sure whether the indexes overlap.  For the moment,
2437          conservatively answer "don't know".  */
2438       if (dump_file && (dump_flags & TDF_DETAILS))
2439         fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
2440
2441       *overlaps_a = conflict_fn_not_known ();
2442       *overlaps_b = conflict_fn_not_known ();
2443       *last_conflicts = chrec_dont_know;
2444       dependence_stats.num_ziv_unimplemented++;
2445       break;
2446     }
2447
2448   if (dump_file && (dump_flags & TDF_DETAILS))
2449     fprintf (dump_file, ")\n");
2450 }
2451
2452 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
2453    and only if it fits to the int type.  If this is not the case, or the
2454    bound  on the number of iterations of LOOP could not be derived, returns
2455    chrec_dont_know.  */
2456
2457 static tree
2458 max_stmt_executions_tree (struct loop *loop)
2459 {
2460   widest_int nit;
2461
2462   if (!max_stmt_executions (loop, &nit))
2463     return chrec_dont_know;
2464
2465   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
2466     return chrec_dont_know;
2467
2468   return wide_int_to_tree (unsigned_type_node, nit);
2469 }
2470
2471 /* Determine whether the CHREC is always positive/negative.  If the expression
2472    cannot be statically analyzed, return false, otherwise set the answer into
2473    VALUE.  */
2474
2475 static bool
2476 chrec_is_positive (tree chrec, bool *value)
2477 {
2478   bool value0, value1, value2;
2479   tree end_value, nb_iter;
2480
2481   switch (TREE_CODE (chrec))
2482     {
2483     case POLYNOMIAL_CHREC:
2484       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
2485           || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
2486         return false;
2487
2488       /* FIXME -- overflows.  */
2489       if (value0 == value1)
2490         {
2491           *value = value0;
2492           return true;
2493         }
2494
2495       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
2496          and the proof consists in showing that the sign never
2497          changes during the execution of the loop, from 0 to
2498          loop->nb_iterations.  */
2499       if (!evolution_function_is_affine_p (chrec))
2500         return false;
2501
2502       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
2503       if (chrec_contains_undetermined (nb_iter))
2504         return false;
2505
2506 #if 0
2507       /* TODO -- If the test is after the exit, we may decrease the number of
2508          iterations by one.  */
2509       if (after_exit)
2510         nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
2511 #endif
2512
2513       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
2514
2515       if (!chrec_is_positive (end_value, &value2))
2516         return false;
2517
2518       *value = value0;
2519       return value0 == value1;
2520
2521     case INTEGER_CST:
2522       switch (tree_int_cst_sgn (chrec))
2523         {
2524         case -1:
2525           *value = false;
2526           break;
2527         case 1:
2528           *value = true;
2529           break;
2530         default:
2531           return false;
2532         }
2533       return true;
2534
2535     default:
2536       return false;
2537     }
2538 }
2539
2540
2541 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
2542    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
2543    *OVERLAPS_B are initialized to the functions that describe the
2544    relation between the elements accessed twice by CHREC_A and
2545    CHREC_B.  For k >= 0, the following property is verified:
2546
2547    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2548
2549 static void
2550 analyze_siv_subscript_cst_affine (tree chrec_a,
2551                                   tree chrec_b,
2552                                   conflict_function **overlaps_a,
2553                                   conflict_function **overlaps_b,
2554                                   tree *last_conflicts)
2555 {
2556   bool value0, value1, value2;
2557   tree type, difference, tmp;
2558
2559   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2560   chrec_a = chrec_convert (type, chrec_a, NULL);
2561   chrec_b = chrec_convert (type, chrec_b, NULL);
2562   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
2563
2564   /* Special case overlap in the first iteration.  */
2565   if (integer_zerop (difference))
2566     {
2567       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2568       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2569       *last_conflicts = integer_one_node;
2570       return;
2571     }
2572
2573   if (!chrec_is_positive (initial_condition (difference), &value0))
2574     {
2575       if (dump_file && (dump_flags & TDF_DETAILS))
2576         fprintf (dump_file, "siv test failed: chrec is not positive.\n");
2577
2578       dependence_stats.num_siv_unimplemented++;
2579       *overlaps_a = conflict_fn_not_known ();
2580       *overlaps_b = conflict_fn_not_known ();
2581       *last_conflicts = chrec_dont_know;
2582       return;
2583     }
2584   else
2585     {
2586       if (value0 == false)
2587         {
2588           if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
2589             {
2590               if (dump_file && (dump_flags & TDF_DETAILS))
2591                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
2592
2593               *overlaps_a = conflict_fn_not_known ();
2594               *overlaps_b = conflict_fn_not_known ();
2595               *last_conflicts = chrec_dont_know;
2596               dependence_stats.num_siv_unimplemented++;
2597               return;
2598             }
2599           else
2600             {
2601               if (value1 == true)
2602                 {
2603                   /* Example:
2604                      chrec_a = 12
2605                      chrec_b = {10, +, 1}
2606                   */
2607
2608                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
2609                     {
2610                       HOST_WIDE_INT numiter;
2611                       struct loop *loop = get_chrec_loop (chrec_b);
2612
2613                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2614                       tmp = fold_build2 (EXACT_DIV_EXPR, type,
2615                                          fold_build1 (ABS_EXPR, type, difference),
2616                                          CHREC_RIGHT (chrec_b));
2617                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
2618                       *last_conflicts = integer_one_node;
2619
2620
2621                       /* Perform weak-zero siv test to see if overlap is
2622                          outside the loop bounds.  */
2623                       numiter = max_stmt_executions_int (loop);
2624
2625                       if (numiter >= 0
2626                           && compare_tree_int (tmp, numiter) > 0)
2627                         {
2628                           free_conflict_function (*overlaps_a);
2629                           free_conflict_function (*overlaps_b);
2630                           *overlaps_a = conflict_fn_no_dependence ();
2631                           *overlaps_b = conflict_fn_no_dependence ();
2632                           *last_conflicts = integer_zero_node;
2633                           dependence_stats.num_siv_independent++;
2634                           return;
2635                         }
2636                       dependence_stats.num_siv_dependent++;
2637                       return;
2638                     }
2639
2640                   /* When the step does not divide the difference, there are
2641                      no overlaps.  */
2642                   else
2643                     {
2644                       *overlaps_a = conflict_fn_no_dependence ();
2645                       *overlaps_b = conflict_fn_no_dependence ();
2646                       *last_conflicts = integer_zero_node;
2647                       dependence_stats.num_siv_independent++;
2648                       return;
2649                     }
2650                 }
2651
2652               else
2653                 {
2654                   /* Example:
2655                      chrec_a = 12
2656                      chrec_b = {10, +, -1}
2657
2658                      In this case, chrec_a will not overlap with chrec_b.  */
2659                   *overlaps_a = conflict_fn_no_dependence ();
2660                   *overlaps_b = conflict_fn_no_dependence ();
2661                   *last_conflicts = integer_zero_node;
2662                   dependence_stats.num_siv_independent++;
2663                   return;
2664                 }
2665             }
2666         }
2667       else
2668         {
2669           if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
2670             {
2671               if (dump_file && (dump_flags & TDF_DETAILS))
2672                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
2673
2674               *overlaps_a = conflict_fn_not_known ();
2675               *overlaps_b = conflict_fn_not_known ();
2676               *last_conflicts = chrec_dont_know;
2677               dependence_stats.num_siv_unimplemented++;
2678               return;
2679             }
2680           else
2681             {
2682               if (value2 == false)
2683                 {
2684                   /* Example:
2685                      chrec_a = 3
2686                      chrec_b = {10, +, -1}
2687                   */
2688                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
2689                     {
2690                       HOST_WIDE_INT numiter;
2691                       struct loop *loop = get_chrec_loop (chrec_b);
2692
2693                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2694                       tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
2695                                          CHREC_RIGHT (chrec_b));
2696                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
2697                       *last_conflicts = integer_one_node;
2698
2699                       /* Perform weak-zero siv test to see if overlap is
2700                          outside the loop bounds.  */
2701                       numiter = max_stmt_executions_int (loop);
2702
2703                       if (numiter >= 0
2704                           && compare_tree_int (tmp, numiter) > 0)
2705                         {
2706                           free_conflict_function (*overlaps_a);
2707                           free_conflict_function (*overlaps_b);
2708                           *overlaps_a = conflict_fn_no_dependence ();
2709                           *overlaps_b = conflict_fn_no_dependence ();
2710                           *last_conflicts = integer_zero_node;
2711                           dependence_stats.num_siv_independent++;
2712                           return;
2713                         }
2714                       dependence_stats.num_siv_dependent++;
2715                       return;
2716                     }
2717
2718                   /* When the step does not divide the difference, there
2719                      are no overlaps.  */
2720                   else
2721                     {
2722                       *overlaps_a = conflict_fn_no_dependence ();
2723                       *overlaps_b = conflict_fn_no_dependence ();
2724                       *last_conflicts = integer_zero_node;
2725                       dependence_stats.num_siv_independent++;
2726                       return;
2727                     }
2728                 }
2729               else
2730                 {
2731                   /* Example:
2732                      chrec_a = 3
2733                      chrec_b = {4, +, 1}
2734
2735                      In this case, chrec_a will not overlap with chrec_b.  */
2736                   *overlaps_a = conflict_fn_no_dependence ();
2737                   *overlaps_b = conflict_fn_no_dependence ();
2738                   *last_conflicts = integer_zero_node;
2739                   dependence_stats.num_siv_independent++;
2740                   return;
2741                 }
2742             }
2743         }
2744     }
2745 }
2746
2747 /* Helper recursive function for initializing the matrix A.  Returns
2748    the initial value of CHREC.  */
2749
2750 static tree
2751 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
2752 {
2753   gcc_assert (chrec);
2754
2755   switch (TREE_CODE (chrec))
2756     {
2757     case POLYNOMIAL_CHREC:
2758       A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
2759       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
2760
2761     case PLUS_EXPR:
2762     case MULT_EXPR:
2763     case MINUS_EXPR:
2764       {
2765         tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2766         tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
2767
2768         return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
2769       }
2770
2771     CASE_CONVERT:
2772       {
2773         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2774         return chrec_convert (chrec_type (chrec), op, NULL);
2775       }
2776
2777     case BIT_NOT_EXPR:
2778       {
2779         /* Handle ~X as -1 - X.  */
2780         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2781         return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
2782                               build_int_cst (TREE_TYPE (chrec), -1), op);
2783       }
2784
2785     case INTEGER_CST:
2786       return chrec;
2787
2788     default:
2789       gcc_unreachable ();
2790       return NULL_TREE;
2791     }
2792 }
2793
2794 #define FLOOR_DIV(x,y) ((x) / (y))
2795
2796 /* Solves the special case of the Diophantine equation:
2797    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2798
2799    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
2800    number of iterations that loops X and Y run.  The overlaps will be
2801    constructed as evolutions in dimension DIM.  */
2802
2803 static void
2804 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
2805                                          HOST_WIDE_INT step_a,
2806                                          HOST_WIDE_INT step_b,
2807                                          affine_fn *overlaps_a,
2808                                          affine_fn *overlaps_b,
2809                                          tree *last_conflicts, int dim)
2810 {
2811   if (((step_a > 0 && step_b > 0)
2812        || (step_a < 0 && step_b < 0)))
2813     {
2814       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
2815       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
2816
2817       gcd_steps_a_b = gcd (step_a, step_b);
2818       step_overlaps_a = step_b / gcd_steps_a_b;
2819       step_overlaps_b = step_a / gcd_steps_a_b;
2820
2821       if (niter > 0)
2822         {
2823           tau2 = FLOOR_DIV (niter, step_overlaps_a);
2824           tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
2825           last_conflict = tau2;
2826           *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2827         }
2828       else
2829         *last_conflicts = chrec_dont_know;
2830
2831       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
2832                                       build_int_cst (NULL_TREE,
2833                                                      step_overlaps_a));
2834       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
2835                                       build_int_cst (NULL_TREE,
2836                                                      step_overlaps_b));
2837     }
2838
2839   else
2840     {
2841       *overlaps_a = affine_fn_cst (integer_zero_node);
2842       *overlaps_b = affine_fn_cst (integer_zero_node);
2843       *last_conflicts = integer_zero_node;
2844     }
2845 }
2846
2847 /* Solves the special case of a Diophantine equation where CHREC_A is
2848    an affine bivariate function, and CHREC_B is an affine univariate
2849    function.  For example,
2850
2851    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2852
2853    has the following overlapping functions:
2854
2855    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2856    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2857    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2858
2859    FORNOW: This is a specialized implementation for a case occurring in
2860    a common benchmark.  Implement the general algorithm.  */
2861
2862 static void
2863 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2864                                       conflict_function **overlaps_a,
2865                                       conflict_function **overlaps_b,
2866                                       tree *last_conflicts)
2867 {
2868   bool xz_p, yz_p, xyz_p;
2869   HOST_WIDE_INT step_x, step_y, step_z;
2870   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2871   affine_fn overlaps_a_xz, overlaps_b_xz;
2872   affine_fn overlaps_a_yz, overlaps_b_yz;
2873   affine_fn overlaps_a_xyz, overlaps_b_xyz;
2874   affine_fn ova1, ova2, ovb;
2875   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2876
2877   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2878   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2879   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2880
2881   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
2882   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
2883   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
2884
2885   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2886     {
2887       if (dump_file && (dump_flags & TDF_DETAILS))
2888         fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2889
2890       *overlaps_a = conflict_fn_not_known ();
2891       *overlaps_b = conflict_fn_not_known ();
2892       *last_conflicts = chrec_dont_know;
2893       return;
2894     }
2895
2896   niter = MIN (niter_x, niter_z);
2897   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2898                                            &overlaps_a_xz,
2899                                            &overlaps_b_xz,
2900                                            &last_conflicts_xz, 1);
2901   niter = MIN (niter_y, niter_z);
2902   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2903                                            &overlaps_a_yz,
2904                                            &overlaps_b_yz,
2905                                            &last_conflicts_yz, 2);
2906   niter = MIN (niter_x, niter_z);
2907   niter = MIN (niter_y, niter);
2908   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2909                                            &overlaps_a_xyz,
2910                                            &overlaps_b_xyz,
2911                                            &last_conflicts_xyz, 3);
2912
2913   xz_p = !integer_zerop (last_conflicts_xz);
2914   yz_p = !integer_zerop (last_conflicts_yz);
2915   xyz_p = !integer_zerop (last_conflicts_xyz);
2916
2917   if (xz_p || yz_p || xyz_p)
2918     {
2919       ova1 = affine_fn_cst (integer_zero_node);
2920       ova2 = affine_fn_cst (integer_zero_node);
2921       ovb = affine_fn_cst (integer_zero_node);
2922       if (xz_p)
2923         {
2924           affine_fn t0 = ova1;
2925           affine_fn t2 = ovb;
2926
2927           ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2928           ovb = affine_fn_plus (ovb, overlaps_b_xz);
2929           affine_fn_free (t0);
2930           affine_fn_free (t2);
2931           *last_conflicts = last_conflicts_xz;
2932         }
2933       if (yz_p)
2934         {
2935           affine_fn t0 = ova2;
2936           affine_fn t2 = ovb;
2937
2938           ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2939           ovb = affine_fn_plus (ovb, overlaps_b_yz);
2940           affine_fn_free (t0);
2941           affine_fn_free (t2);
2942           *last_conflicts = last_conflicts_yz;
2943         }
2944       if (xyz_p)
2945         {
2946           affine_fn t0 = ova1;
2947           affine_fn t2 = ova2;
2948           affine_fn t4 = ovb;
2949
2950           ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2951           ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2952           ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2953           affine_fn_free (t0);
2954           affine_fn_free (t2);
2955           affine_fn_free (t4);
2956           *last_conflicts = last_conflicts_xyz;
2957         }
2958       *overlaps_a = conflict_fn (2, ova1, ova2);
2959       *overlaps_b = conflict_fn (1, ovb);
2960     }
2961   else
2962     {
2963       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2964       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2965       *last_conflicts = integer_zero_node;
2966     }
2967
2968   affine_fn_free (overlaps_a_xz);
2969   affine_fn_free (overlaps_b_xz);
2970   affine_fn_free (overlaps_a_yz);
2971   affine_fn_free (overlaps_b_yz);
2972   affine_fn_free (overlaps_a_xyz);
2973   affine_fn_free (overlaps_b_xyz);
2974 }
2975
2976 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
2977
2978 static void
2979 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2980                     int size)
2981 {
2982   memcpy (vec2, vec1, size * sizeof (*vec1));
2983 }
2984
2985 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
2986
2987 static void
2988 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2989                     int m, int n)
2990 {
2991   int i;
2992
2993   for (i = 0; i < m; i++)
2994     lambda_vector_copy (mat1[i], mat2[i], n);
2995 }
2996
2997 /* Store the N x N identity matrix in MAT.  */
2998
2999 static void
3000 lambda_matrix_id (lambda_matrix mat, int size)
3001 {
3002   int i, j;
3003
3004   for (i = 0; i < size; i++)
3005     for (j = 0; j < size; j++)
3006       mat[i][j] = (i == j) ? 1 : 0;
3007 }
3008
3009 /* Return the first nonzero element of vector VEC1 between START and N.
3010    We must have START <= N.   Returns N if VEC1 is the zero vector.  */
3011
3012 static int
3013 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
3014 {
3015   int j = start;
3016   while (j < n && vec1[j] == 0)
3017     j++;
3018   return j;
3019 }
3020
3021 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3022    R2 = R2 + CONST1 * R1.  */
3023
3024 static void
3025 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
3026 {
3027   int i;
3028
3029   if (const1 == 0)
3030     return;
3031
3032   for (i = 0; i < n; i++)
3033     mat[r2][i] += const1 * mat[r1][i];
3034 }
3035
3036 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3037    and store the result in VEC2.  */
3038
3039 static void
3040 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
3041                           int size, int const1)
3042 {
3043   int i;
3044
3045   if (const1 == 0)
3046     lambda_vector_clear (vec2, size);
3047   else
3048     for (i = 0; i < size; i++)
3049       vec2[i] = const1 * vec1[i];
3050 }
3051
3052 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
3053
3054 static void
3055 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
3056                       int size)
3057 {
3058   lambda_vector_mult_const (vec1, vec2, size, -1);
3059 }
3060
3061 /* Negate row R1 of matrix MAT which has N columns.  */
3062
3063 static void
3064 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
3065 {
3066   lambda_vector_negate (mat[r1], mat[r1], n);
3067 }
3068
3069 /* Return true if two vectors are equal.  */
3070
3071 static bool
3072 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
3073 {
3074   int i;
3075   for (i = 0; i < size; i++)
3076     if (vec1[i] != vec2[i])
3077       return false;
3078   return true;
3079 }
3080
3081 /* Given an M x N integer matrix A, this function determines an M x
3082    M unimodular matrix U, and an M x N echelon matrix S such that
3083    "U.A = S".  This decomposition is also known as "right Hermite".
3084
3085    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3086    Restructuring Compilers" Utpal Banerjee.  */
3087
3088 static void
3089 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
3090                              lambda_matrix S, lambda_matrix U)
3091 {
3092   int i, j, i0 = 0;
3093
3094   lambda_matrix_copy (A, S, m, n);
3095   lambda_matrix_id (U, m);
3096
3097   for (j = 0; j < n; j++)
3098     {
3099       if (lambda_vector_first_nz (S[j], m, i0) < m)
3100         {
3101           ++i0;
3102           for (i = m - 1; i >= i0; i--)
3103             {
3104               while (S[i][j] != 0)
3105                 {
3106                   int sigma, factor, a, b;
3107
3108                   a = S[i-1][j];
3109                   b = S[i][j];
3110                   sigma = (a * b < 0) ? -1: 1;
3111                   a = abs (a);
3112                   b = abs (b);
3113                   factor = sigma * (a / b);
3114
3115                   lambda_matrix_row_add (S, n, i, i-1, -factor);
3116                   std::swap (S[i], S[i-1]);
3117
3118                   lambda_matrix_row_add (U, m, i, i-1, -factor);
3119                   std::swap (U[i], U[i-1]);
3120                 }
3121             }
3122         }
3123     }
3124 }
3125
3126 /* Determines the overlapping elements due to accesses CHREC_A and
3127    CHREC_B, that are affine functions.  This function cannot handle
3128    symbolic evolution functions, ie. when initial conditions are
3129    parameters, because it uses lambda matrices of integers.  */
3130
3131 static void
3132 analyze_subscript_affine_affine (tree chrec_a,
3133                                  tree chrec_b,
3134                                  conflict_function **overlaps_a,
3135                                  conflict_function **overlaps_b,
3136                                  tree *last_conflicts)
3137 {
3138   unsigned nb_vars_a, nb_vars_b, dim;
3139   HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
3140   lambda_matrix A, U, S;
3141   struct obstack scratch_obstack;
3142
3143   if (eq_evolutions_p (chrec_a, chrec_b))
3144     {
3145       /* The accessed index overlaps for each iteration in the
3146          loop.  */
3147       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3148       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3149       *last_conflicts = chrec_dont_know;
3150       return;
3151     }
3152   if (dump_file && (dump_flags & TDF_DETAILS))
3153     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
3154
3155   /* For determining the initial intersection, we have to solve a
3156      Diophantine equation.  This is the most time consuming part.
3157
3158      For answering to the question: "Is there a dependence?" we have
3159      to prove that there exists a solution to the Diophantine
3160      equation, and that the solution is in the iteration domain,
3161      i.e. the solution is positive or zero, and that the solution
3162      happens before the upper bound loop.nb_iterations.  Otherwise
3163      there is no dependence.  This function outputs a description of
3164      the iterations that hold the intersections.  */
3165
3166   nb_vars_a = nb_vars_in_chrec (chrec_a);
3167   nb_vars_b = nb_vars_in_chrec (chrec_b);
3168
3169   gcc_obstack_init (&scratch_obstack);
3170
3171   dim = nb_vars_a + nb_vars_b;
3172   U = lambda_matrix_new (dim, dim, &scratch_obstack);
3173   A = lambda_matrix_new (dim, 1, &scratch_obstack);
3174   S = lambda_matrix_new (dim, 1, &scratch_obstack);
3175
3176   init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
3177   init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
3178   gamma = init_b - init_a;
3179
3180   /* Don't do all the hard work of solving the Diophantine equation
3181      when we already know the solution: for example,
3182      | {3, +, 1}_1
3183      | {3, +, 4}_2
3184      | gamma = 3 - 3 = 0.
3185      Then the first overlap occurs during the first iterations:
3186      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
3187   */
3188   if (gamma == 0)
3189     {
3190       if (nb_vars_a == 1 && nb_vars_b == 1)
3191         {
3192           HOST_WIDE_INT step_a, step_b;
3193           HOST_WIDE_INT niter, niter_a, niter_b;
3194           affine_fn ova, ovb;
3195
3196           niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
3197           niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
3198           niter = MIN (niter_a, niter_b);
3199           step_a = int_cst_value (CHREC_RIGHT (chrec_a));
3200           step_b = int_cst_value (CHREC_RIGHT (chrec_b));
3201
3202           compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
3203                                                    &ova, &ovb,
3204                                                    last_conflicts, 1);
3205           *overlaps_a = conflict_fn (1, ova);
3206           *overlaps_b = conflict_fn (1, ovb);
3207         }
3208
3209       else if (nb_vars_a == 2 && nb_vars_b == 1)
3210         compute_overlap_steps_for_affine_1_2
3211           (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
3212
3213       else if (nb_vars_a == 1 && nb_vars_b == 2)
3214         compute_overlap_steps_for_affine_1_2
3215           (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
3216
3217       else
3218         {
3219           if (dump_file && (dump_flags & TDF_DETAILS))
3220             fprintf (dump_file, "affine-affine test failed: too many variables.\n");
3221           *overlaps_a = conflict_fn_not_known ();
3222           *overlaps_b = conflict_fn_not_known ();
3223           *last_conflicts = chrec_dont_know;
3224         }
3225       goto end_analyze_subs_aa;
3226     }
3227
3228   /* U.A = S */
3229   lambda_matrix_right_hermite (A, dim, 1, S, U);
3230
3231   if (S[0][0] < 0)
3232     {
3233       S[0][0] *= -1;
3234       lambda_matrix_row_negate (U, dim, 0);
3235     }
3236   gcd_alpha_beta = S[0][0];
3237
3238   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
3239      but that is a quite strange case.  Instead of ICEing, answer
3240      don't know.  */
3241   if (gcd_alpha_beta == 0)
3242     {
3243       *overlaps_a = conflict_fn_not_known ();
3244       *overlaps_b = conflict_fn_not_known ();
3245       *last_conflicts = chrec_dont_know;
3246       goto end_analyze_subs_aa;
3247     }
3248
3249   /* The classic "gcd-test".  */
3250   if (!int_divides_p (gcd_alpha_beta, gamma))
3251     {
3252       /* The "gcd-test" has determined that there is no integer
3253          solution, i.e. there is no dependence.  */
3254       *overlaps_a = conflict_fn_no_dependence ();
3255       *overlaps_b = conflict_fn_no_dependence ();
3256       *last_conflicts = integer_zero_node;
3257     }
3258
3259   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
3260   else if (nb_vars_a == 1 && nb_vars_b == 1)
3261     {
3262       /* Both functions should have the same evolution sign.  */
3263       if (((A[0][0] > 0 && -A[1][0] > 0)
3264            || (A[0][0] < 0 && -A[1][0] < 0)))
3265         {
3266           /* The solutions are given by:
3267              |
3268              | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
3269              |                           [u21 u22]    [y0]
3270
3271              For a given integer t.  Using the following variables,
3272
3273              | i0 = u11 * gamma / gcd_alpha_beta
3274              | j0 = u12 * gamma / gcd_alpha_beta
3275              | i1 = u21
3276              | j1 = u22
3277
3278              the solutions are:
3279
3280              | x0 = i0 + i1 * t,
3281              | y0 = j0 + j1 * t.  */
3282           HOST_WIDE_INT i0, j0, i1, j1;
3283
3284           i0 = U[0][0] * gamma / gcd_alpha_beta;
3285           j0 = U[0][1] * gamma / gcd_alpha_beta;
3286           i1 = U[1][0];
3287           j1 = U[1][1];
3288
3289           if ((i1 == 0 && i0 < 0)
3290               || (j1 == 0 && j0 < 0))
3291             {
3292               /* There is no solution.
3293                  FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
3294                  falls in here, but for the moment we don't look at the
3295                  upper bound of the iteration domain.  */
3296               *overlaps_a = conflict_fn_no_dependence ();
3297               *overlaps_b = conflict_fn_no_dependence ();
3298               *last_conflicts = integer_zero_node;
3299               goto end_analyze_subs_aa;
3300             }
3301
3302           if (i1 > 0 && j1 > 0)
3303             {
3304               HOST_WIDE_INT niter_a
3305                 = max_stmt_executions_int (get_chrec_loop (chrec_a));
3306               HOST_WIDE_INT niter_b
3307                 = max_stmt_executions_int (get_chrec_loop (chrec_b));
3308               HOST_WIDE_INT niter = MIN (niter_a, niter_b);
3309
3310               /* (X0, Y0) is a solution of the Diophantine equation:
3311                  "chrec_a (X0) = chrec_b (Y0)".  */
3312               HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
3313                                         CEIL (-j0, j1));
3314               HOST_WIDE_INT x0 = i1 * tau1 + i0;
3315               HOST_WIDE_INT y0 = j1 * tau1 + j0;
3316
3317               /* (X1, Y1) is the smallest positive solution of the eq
3318                  "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
3319                  first conflict occurs.  */
3320               HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
3321               HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
3322               HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
3323
3324               if (niter > 0)
3325                 {
3326                   HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter_a - i0, i1),
3327                                             FLOOR_DIV (niter_b - j0, j1));
3328                   HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
3329
3330                   /* If the overlap occurs outside of the bounds of the
3331                      loop, there is no dependence.  */
3332                   if (x1 >= niter_a || y1 >= niter_b)
3333                     {
3334                       *overlaps_a = conflict_fn_no_dependence ();
3335                       *overlaps_b = conflict_fn_no_dependence ();
3336                       *last_conflicts = integer_zero_node;
3337                       goto end_analyze_subs_aa;
3338                     }
3339                   else
3340                     *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3341                 }
3342               else
3343                 *last_conflicts = chrec_dont_know;
3344
3345               *overlaps_a
3346                 = conflict_fn (1,
3347                                affine_fn_univar (build_int_cst (NULL_TREE, x1),
3348                                                  1,
3349                                                  build_int_cst (NULL_TREE, i1)));
3350               *overlaps_b
3351                 = conflict_fn (1,
3352                                affine_fn_univar (build_int_cst (NULL_TREE, y1),
3353                                                  1,
3354                                                  build_int_cst (NULL_TREE, j1)));
3355             }
3356           else
3357             {
3358               /* FIXME: For the moment, the upper bound of the
3359                  iteration domain for i and j is not checked.  */
3360               if (dump_file && (dump_flags & TDF_DETAILS))
3361                 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3362               *overlaps_a = conflict_fn_not_known ();
3363               *overlaps_b = conflict_fn_not_known ();
3364               *last_conflicts = chrec_dont_know;
3365             }
3366         }
3367       else
3368         {
3369           if (dump_file && (dump_flags & TDF_DETAILS))
3370             fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3371           *overlaps_a = conflict_fn_not_known ();
3372           *overlaps_b = conflict_fn_not_known ();
3373           *last_conflicts = chrec_dont_know;
3374         }
3375     }
3376   else
3377     {
3378       if (dump_file && (dump_flags & TDF_DETAILS))
3379         fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3380       *overlaps_a = conflict_fn_not_known ();
3381       *overlaps_b = conflict_fn_not_known ();
3382       *last_conflicts = chrec_dont_know;
3383     }
3384
3385 end_analyze_subs_aa:
3386   obstack_free (&scratch_obstack, NULL);
3387   if (dump_file && (dump_flags & TDF_DETAILS))
3388     {
3389       fprintf (dump_file, "  (overlaps_a = ");
3390       dump_conflict_function (dump_file, *overlaps_a);
3391       fprintf (dump_file, ")\n  (overlaps_b = ");
3392       dump_conflict_function (dump_file, *overlaps_b);
3393       fprintf (dump_file, "))\n");
3394     }
3395 }
3396
3397 /* Returns true when analyze_subscript_affine_affine can be used for
3398    determining the dependence relation between chrec_a and chrec_b,
3399    that contain symbols.  This function modifies chrec_a and chrec_b
3400    such that the analysis result is the same, and such that they don't
3401    contain symbols, and then can safely be passed to the analyzer.
3402
3403    Example: The analysis of the following tuples of evolutions produce
3404    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
3405    vs. {0, +, 1}_1
3406
3407    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
3408    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
3409 */
3410
3411 static bool
3412 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
3413 {
3414   tree diff, type, left_a, left_b, right_b;
3415
3416   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
3417       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
3418     /* FIXME: For the moment not handled.  Might be refined later.  */
3419     return false;
3420
3421   type = chrec_type (*chrec_a);
3422   left_a = CHREC_LEFT (*chrec_a);
3423   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
3424   diff = chrec_fold_minus (type, left_a, left_b);
3425
3426   if (!evolution_function_is_constant_p (diff))
3427     return false;
3428
3429   if (dump_file && (dump_flags & TDF_DETAILS))
3430     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
3431
3432   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
3433                                      diff, CHREC_RIGHT (*chrec_a));
3434   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
3435   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
3436                                      build_int_cst (type, 0),
3437                                      right_b);
3438   return true;
3439 }
3440
3441 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
3442    *OVERLAPS_B are initialized to the functions that describe the
3443    relation between the elements accessed twice by CHREC_A and
3444    CHREC_B.  For k >= 0, the following property is verified:
3445
3446    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3447
3448 static void
3449 analyze_siv_subscript (tree chrec_a,
3450                        tree chrec_b,
3451                        conflict_function **overlaps_a,
3452                        conflict_function **overlaps_b,
3453                        tree *last_conflicts,
3454                        int loop_nest_num)
3455 {
3456   dependence_stats.num_siv++;
3457
3458   if (dump_file && (dump_flags & TDF_DETAILS))
3459     fprintf (dump_file, "(analyze_siv_subscript \n");
3460
3461   if (evolution_function_is_constant_p (chrec_a)
3462       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3463     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
3464                                       overlaps_a, overlaps_b, last_conflicts);
3465
3466   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3467            && evolution_function_is_constant_p (chrec_b))
3468     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
3469                                       overlaps_b, overlaps_a, last_conflicts);
3470
3471   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3472            && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3473     {
3474       if (!chrec_contains_symbols (chrec_a)
3475           && !chrec_contains_symbols (chrec_b))
3476         {
3477           analyze_subscript_affine_affine (chrec_a, chrec_b,
3478                                            overlaps_a, overlaps_b,
3479                                            last_conflicts);
3480
3481           if (CF_NOT_KNOWN_P (*overlaps_a)
3482               || CF_NOT_KNOWN_P (*overlaps_b))
3483             dependence_stats.num_siv_unimplemented++;
3484           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3485                    || CF_NO_DEPENDENCE_P (*overlaps_b))
3486             dependence_stats.num_siv_independent++;
3487           else
3488             dependence_stats.num_siv_dependent++;
3489         }
3490       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
3491                                                         &chrec_b))
3492         {
3493           analyze_subscript_affine_affine (chrec_a, chrec_b,
3494                                            overlaps_a, overlaps_b,
3495                                            last_conflicts);
3496
3497           if (CF_NOT_KNOWN_P (*overlaps_a)
3498               || CF_NOT_KNOWN_P (*overlaps_b))
3499             dependence_stats.num_siv_unimplemented++;
3500           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3501                    || CF_NO_DEPENDENCE_P (*overlaps_b))
3502             dependence_stats.num_siv_independent++;
3503           else
3504             dependence_stats.num_siv_dependent++;
3505         }
3506       else
3507         goto siv_subscript_dontknow;
3508     }
3509
3510   else
3511     {
3512     siv_subscript_dontknow:;
3513       if (dump_file && (dump_flags & TDF_DETAILS))
3514         fprintf (dump_file, "  siv test failed: unimplemented");
3515       *overlaps_a = conflict_fn_not_known ();
3516       *overlaps_b = conflict_fn_not_known ();
3517       *last_conflicts = chrec_dont_know;
3518       dependence_stats.num_siv_unimplemented++;
3519     }
3520
3521   if (dump_file && (dump_flags & TDF_DETAILS))
3522     fprintf (dump_file, ")\n");
3523 }
3524
3525 /* Returns false if we can prove that the greatest common divisor of the steps
3526    of CHREC does not divide CST, false otherwise.  */
3527
3528 static bool
3529 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
3530 {
3531   HOST_WIDE_INT cd = 0, val;
3532   tree step;
3533
3534   if (!tree_fits_shwi_p (cst))
3535     return true;
3536   val = tree_to_shwi (cst);
3537
3538   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
3539     {
3540       step = CHREC_RIGHT (chrec);
3541       if (!tree_fits_shwi_p (step))
3542         return true;
3543       cd = gcd (cd, tree_to_shwi (step));
3544       chrec = CHREC_LEFT (chrec);
3545     }
3546
3547   return val % cd == 0;
3548 }
3549
3550 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
3551    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
3552    functions that describe the relation between the elements accessed
3553    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
3554    is verified:
3555
3556    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3557
3558 static void
3559 analyze_miv_subscript (tree chrec_a,
3560                        tree chrec_b,
3561                        conflict_function **overlaps_a,
3562                        conflict_function **overlaps_b,
3563                        tree *last_conflicts,
3564                        struct loop *loop_nest)
3565 {
3566   tree type, difference;
3567
3568   dependence_stats.num_miv++;
3569   if (dump_file && (dump_flags & TDF_DETAILS))
3570     fprintf (dump_file, "(analyze_miv_subscript \n");
3571
3572   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3573   chrec_a = chrec_convert (type, chrec_a, NULL);
3574   chrec_b = chrec_convert (type, chrec_b, NULL);
3575   difference = chrec_fold_minus (type, chrec_a, chrec_b);
3576
3577   if (eq_evolutions_p (chrec_a, chrec_b))
3578     {
3579       /* Access functions are the same: all the elements are accessed
3580          in the same order.  */
3581       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3582       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3583       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
3584       dependence_stats.num_miv_dependent++;
3585     }
3586
3587   else if (evolution_function_is_constant_p (difference)
3588            /* For the moment, the following is verified:
3589               evolution_function_is_affine_multivariate_p (chrec_a,
3590               loop_nest->num) */
3591            && !gcd_of_steps_may_divide_p (chrec_a, difference))
3592     {
3593       /* testsuite/.../ssa-chrec-33.c
3594          {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
3595
3596          The difference is 1, and all the evolution steps are multiples
3597          of 2, consequently there are no overlapping elements.  */
3598       *overlaps_a = conflict_fn_no_dependence ();
3599       *overlaps_b = conflict_fn_no_dependence ();
3600       *last_conflicts = integer_zero_node;
3601       dependence_stats.num_miv_independent++;
3602     }
3603
3604   else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
3605            && !chrec_contains_symbols (chrec_a)
3606            && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
3607            && !chrec_contains_symbols (chrec_b))
3608     {
3609       /* testsuite/.../ssa-chrec-35.c
3610          {0, +, 1}_2  vs.  {0, +, 1}_3
3611          the overlapping elements are respectively located at iterations:
3612          {0, +, 1}_x and {0, +, 1}_x,
3613          in other words, we have the equality:
3614          {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
3615
3616          Other examples:
3617          {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
3618          {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
3619
3620          {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
3621          {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
3622       */
3623       analyze_subscript_affine_affine (chrec_a, chrec_b,
3624                                        overlaps_a, overlaps_b, last_conflicts);
3625
3626       if (CF_NOT_KNOWN_P (*overlaps_a)
3627           || CF_NOT_KNOWN_P (*overlaps_b))
3628         dependence_stats.num_miv_unimplemented++;
3629       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3630                || CF_NO_DEPENDENCE_P (*overlaps_b))
3631         dependence_stats.num_miv_independent++;
3632       else
3633         dependence_stats.num_miv_dependent++;
3634     }
3635
3636   else
3637     {
3638       /* When the analysis is too difficult, answer "don't know".  */
3639       if (dump_file && (dump_flags & TDF_DETAILS))
3640         fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
3641
3642       *overlaps_a = conflict_fn_not_known ();
3643       *overlaps_b = conflict_fn_not_known ();
3644       *last_conflicts = chrec_dont_know;
3645       dependence_stats.num_miv_unimplemented++;
3646     }
3647
3648   if (dump_file && (dump_flags & TDF_DETAILS))
3649     fprintf (dump_file, ")\n");
3650 }
3651
3652 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
3653    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
3654    OVERLAP_ITERATIONS_B are initialized with two functions that
3655    describe the iterations that contain conflicting elements.
3656
3657    Remark: For an integer k >= 0, the following equality is true:
3658
3659    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
3660 */
3661
3662 static void
3663 analyze_overlapping_iterations (tree chrec_a,
3664                                 tree chrec_b,
3665                                 conflict_function **overlap_iterations_a,
3666                                 conflict_function **overlap_iterations_b,
3667                                 tree *last_conflicts, struct loop *loop_nest)
3668 {
3669   unsigned int lnn = loop_nest->num;
3670
3671   dependence_stats.num_subscript_tests++;
3672
3673   if (dump_file && (dump_flags & TDF_DETAILS))
3674     {
3675       fprintf (dump_file, "(analyze_overlapping_iterations \n");
3676       fprintf (dump_file, "  (chrec_a = ");
3677       print_generic_expr (dump_file, chrec_a);
3678       fprintf (dump_file, ")\n  (chrec_b = ");
3679       print_generic_expr (dump_file, chrec_b);
3680       fprintf (dump_file, ")\n");
3681     }
3682
3683   if (chrec_a == NULL_TREE
3684       || chrec_b == NULL_TREE
3685       || chrec_contains_undetermined (chrec_a)
3686       || chrec_contains_undetermined (chrec_b))
3687     {
3688       dependence_stats.num_subscript_undetermined++;
3689
3690       *overlap_iterations_a = conflict_fn_not_known ();
3691       *overlap_iterations_b = conflict_fn_not_known ();
3692     }
3693
3694   /* If they are the same chrec, and are affine, they overlap
3695      on every iteration.  */
3696   else if (eq_evolutions_p (chrec_a, chrec_b)
3697            && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
3698                || operand_equal_p (chrec_a, chrec_b, 0)))
3699     {
3700       dependence_stats.num_same_subscript_function++;
3701       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3702       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3703       *last_conflicts = chrec_dont_know;
3704     }
3705
3706   /* If they aren't the same, and aren't affine, we can't do anything
3707      yet.  */
3708   else if ((chrec_contains_symbols (chrec_a)
3709             || chrec_contains_symbols (chrec_b))
3710            && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
3711                || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
3712     {
3713       dependence_stats.num_subscript_undetermined++;
3714       *overlap_iterations_a = conflict_fn_not_known ();
3715       *overlap_iterations_b = conflict_fn_not_known ();
3716     }
3717
3718   else if (ziv_subscript_p (chrec_a, chrec_b))
3719     analyze_ziv_subscript (chrec_a, chrec_b,
3720                            overlap_iterations_a, overlap_iterations_b,
3721                            last_conflicts);
3722
3723   else if (siv_subscript_p (chrec_a, chrec_b))
3724     analyze_siv_subscript (chrec_a, chrec_b,
3725                            overlap_iterations_a, overlap_iterations_b,
3726                            last_conflicts, lnn);
3727
3728   else
3729     analyze_miv_subscript (chrec_a, chrec_b,
3730                            overlap_iterations_a, overlap_iterations_b,
3731                            last_conflicts, loop_nest);
3732
3733   if (dump_file && (dump_flags & TDF_DETAILS))
3734     {
3735       fprintf (dump_file, "  (overlap_iterations_a = ");
3736       dump_conflict_function (dump_file, *overlap_iterations_a);
3737       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
3738       dump_conflict_function (dump_file, *overlap_iterations_b);
3739       fprintf (dump_file, "))\n");
3740     }
3741 }
3742
3743 /* Helper function for uniquely inserting distance vectors.  */
3744
3745 static void
3746 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
3747 {
3748   unsigned i;
3749   lambda_vector v;
3750
3751   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
3752     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
3753       return;
3754
3755   DDR_DIST_VECTS (ddr).safe_push (dist_v);
3756 }
3757
3758 /* Helper function for uniquely inserting direction vectors.  */
3759
3760 static void
3761 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
3762 {
3763   unsigned i;
3764   lambda_vector v;
3765
3766   FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
3767     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
3768       return;
3769
3770   DDR_DIR_VECTS (ddr).safe_push (dir_v);
3771 }
3772
3773 /* Add a distance of 1 on all the loops outer than INDEX.  If we
3774    haven't yet determined a distance for this outer loop, push a new
3775    distance vector composed of the previous distance, and a distance
3776    of 1 for this outer loop.  Example:
3777
3778    | loop_1
3779    |   loop_2
3780    |     A[10]
3781    |   endloop_2
3782    | endloop_1
3783
3784    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
3785    save (0, 1), then we have to save (1, 0).  */
3786
3787 static void
3788 add_outer_distances (struct data_dependence_relation *ddr,
3789                      lambda_vector dist_v, int index)
3790 {
3791   /* For each outer loop where init_v is not set, the accesses are
3792      in dependence of distance 1 in the loop.  */
3793   while (--index >= 0)
3794     {
3795       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3796       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3797       save_v[index] = 1;
3798       save_dist_v (ddr, save_v);
3799     }
3800 }
3801
3802 /* Return false when fail to represent the data dependence as a
3803    distance vector.  INIT_B is set to true when a component has been
3804    added to the distance vector DIST_V.  INDEX_CARRY is then set to
3805    the index in DIST_V that carries the dependence.  */
3806
3807 static bool
3808 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
3809                              struct data_reference *ddr_a,
3810                              struct data_reference *ddr_b,
3811                              lambda_vector dist_v, bool *init_b,
3812                              int *index_carry)
3813 {
3814   unsigned i;
3815   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3816
3817   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3818     {
3819       tree access_fn_a, access_fn_b;
3820       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
3821
3822       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3823         {
3824           non_affine_dependence_relation (ddr);
3825           return false;
3826         }
3827
3828       access_fn_a = DR_ACCESS_FN (ddr_a, i);
3829       access_fn_b = DR_ACCESS_FN (ddr_b, i);
3830
3831       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
3832           && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
3833         {
3834           HOST_WIDE_INT dist;
3835           int index;
3836           int var_a = CHREC_VARIABLE (access_fn_a);
3837           int var_b = CHREC_VARIABLE (access_fn_b);
3838
3839           if (var_a != var_b
3840               || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3841             {
3842               non_affine_dependence_relation (ddr);
3843               return false;
3844             }
3845
3846           dist = int_cst_value (SUB_DISTANCE (subscript));
3847           index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3848           *index_carry = MIN (index, *index_carry);
3849
3850           /* This is the subscript coupling test.  If we have already
3851              recorded a distance for this loop (a distance coming from
3852              another subscript), it should be the same.  For example,
3853              in the following code, there is no dependence:
3854
3855              | loop i = 0, N, 1
3856              |   T[i+1][i] = ...
3857              |   ... = T[i][i]
3858              | endloop
3859           */
3860           if (init_v[index] != 0 && dist_v[index] != dist)
3861             {
3862               finalize_ddr_dependent (ddr, chrec_known);
3863               return false;
3864             }
3865
3866           dist_v[index] = dist;
3867           init_v[index] = 1;
3868           *init_b = true;
3869         }
3870       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3871         {
3872           /* This can be for example an affine vs. constant dependence
3873              (T[i] vs. T[3]) that is not an affine dependence and is
3874              not representable as a distance vector.  */
3875           non_affine_dependence_relation (ddr);
3876           return false;
3877         }
3878     }
3879
3880   return true;
3881 }
3882
3883 /* Return true when the DDR contains only constant access functions.  */
3884
3885 static bool
3886 constant_access_functions (const struct data_dependence_relation *ddr)
3887 {
3888   unsigned i;
3889
3890   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3891     if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3892         || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3893       return false;
3894
3895   return true;
3896 }
3897
3898 /* Helper function for the case where DDR_A and DDR_B are the same
3899    multivariate access function with a constant step.  For an example
3900    see pr34635-1.c.  */
3901
3902 static void
3903 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3904 {
3905   int x_1, x_2;
3906   tree c_1 = CHREC_LEFT (c_2);
3907   tree c_0 = CHREC_LEFT (c_1);
3908   lambda_vector dist_v;
3909   HOST_WIDE_INT v1, v2, cd;
3910
3911   /* Polynomials with more than 2 variables are not handled yet.  When
3912      the evolution steps are parameters, it is not possible to
3913      represent the dependence using classical distance vectors.  */
3914   if (TREE_CODE (c_0) != INTEGER_CST
3915       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3916       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3917     {
3918       DDR_AFFINE_P (ddr) = false;
3919       return;
3920     }
3921
3922   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3923   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3924
3925   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
3926   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3927   v1 = int_cst_value (CHREC_RIGHT (c_1));
3928   v2 = int_cst_value (CHREC_RIGHT (c_2));
3929   cd = gcd (v1, v2);
3930   v1 /= cd;
3931   v2 /= cd;
3932
3933   if (v2 < 0)
3934     {
3935       v2 = -v2;
3936       v1 = -v1;
3937     }
3938
3939   dist_v[x_1] = v2;
3940   dist_v[x_2] = -v1;
3941   save_dist_v (ddr, dist_v);
3942
3943   add_outer_distances (ddr, dist_v, x_1);
3944 }
3945
3946 /* Helper function for the case where DDR_A and DDR_B are the same
3947    access functions.  */
3948
3949 static void
3950 add_other_self_distances (struct data_dependence_relation *ddr)
3951 {
3952   lambda_vector dist_v;
3953   unsigned i;
3954   int index_carry = DDR_NB_LOOPS (ddr);
3955
3956   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3957     {
3958       tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3959
3960       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3961         {
3962           if (!evolution_function_is_univariate_p (access_fun))
3963             {
3964               if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3965                 {
3966                   DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3967                   return;
3968                 }
3969
3970               access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3971
3972               if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3973                 add_multivariate_self_dist (ddr, access_fun);
3974               else
3975                 /* The evolution step is not constant: it varies in
3976                    the outer loop, so this cannot be represented by a
3977                    distance vector.  For example in pr34635.c the
3978                    evolution is {0, +, {0, +, 4}_1}_2.  */
3979                 DDR_AFFINE_P (ddr) = false;
3980
3981               return;
3982             }
3983
3984           index_carry = MIN (index_carry,
3985                              index_in_loop_nest (CHREC_VARIABLE (access_fun),
3986                                                  DDR_LOOP_NEST (ddr)));
3987         }
3988     }
3989
3990   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3991   add_outer_distances (ddr, dist_v, index_carry);
3992 }
3993
3994 static void
3995 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3996 {
3997   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3998
3999   dist_v[DDR_INNER_LOOP (ddr)] = 1;
4000   save_dist_v (ddr, dist_v);
4001 }
4002
4003 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
4004    is the case for example when access functions are the same and
4005    equal to a constant, as in:
4006
4007    | loop_1
4008    |   A[3] = ...
4009    |   ... = A[3]
4010    | endloop_1
4011
4012    in which case the distance vectors are (0) and (1).  */
4013
4014 static void
4015 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
4016 {
4017   unsigned i, j;
4018
4019   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4020     {
4021       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
4022       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
4023       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
4024
4025       for (j = 0; j < ca->n; j++)
4026         if (affine_function_zero_p (ca->fns[j]))
4027           {
4028             insert_innermost_unit_dist_vector (ddr);
4029             return;
4030           }
4031
4032       for (j = 0; j < cb->n; j++)
4033         if (affine_function_zero_p (cb->fns[j]))
4034           {
4035             insert_innermost_unit_dist_vector (ddr);
4036             return;
4037           }
4038     }
4039 }
4040
4041 /* Compute the classic per loop distance vector.  DDR is the data
4042    dependence relation to build a vector from.  Return false when fail
4043    to represent the data dependence as a distance vector.  */
4044
4045 static bool
4046 build_classic_dist_vector (struct data_dependence_relation *ddr,
4047                            struct loop *loop_nest)
4048 {
4049   bool init_b = false;
4050   int index_carry = DDR_NB_LOOPS (ddr);
4051   lambda_vector dist_v;
4052
4053   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4054     return false;
4055
4056   if (same_access_functions (ddr))
4057     {
4058       /* Save the 0 vector.  */
4059       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4060       save_dist_v (ddr, dist_v);
4061
4062       if (constant_access_functions (ddr))
4063         add_distance_for_zero_overlaps (ddr);
4064
4065       if (DDR_NB_LOOPS (ddr) > 1)
4066         add_other_self_distances (ddr);
4067
4068       return true;
4069     }
4070
4071   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4072   if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
4073                                     dist_v, &init_b, &index_carry))
4074     return false;
4075
4076   /* Save the distance vector if we initialized one.  */
4077   if (init_b)
4078     {
4079       /* Verify a basic constraint: classic distance vectors should
4080          always be lexicographically positive.
4081
4082          Data references are collected in the order of execution of
4083          the program, thus for the following loop
4084
4085          | for (i = 1; i < 100; i++)
4086          |   for (j = 1; j < 100; j++)
4087          |     {
4088          |       t = T[j+1][i-1];  // A
4089          |       T[j][i] = t + 2;  // B
4090          |     }
4091
4092          references are collected following the direction of the wind:
4093          A then B.  The data dependence tests are performed also
4094          following this order, such that we're looking at the distance
4095          separating the elements accessed by A from the elements later
4096          accessed by B.  But in this example, the distance returned by
4097          test_dep (A, B) is lexicographically negative (-1, 1), that
4098          means that the access A occurs later than B with respect to
4099          the outer loop, ie. we're actually looking upwind.  In this
4100          case we solve test_dep (B, A) looking downwind to the
4101          lexicographically positive solution, that returns the
4102          distance vector (1, -1).  */
4103       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
4104         {
4105           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4106           if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
4107                                               loop_nest))
4108             return false;
4109           compute_subscript_distance (ddr);
4110           if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
4111                                             save_v, &init_b, &index_carry))
4112             return false;
4113           save_dist_v (ddr, save_v);
4114           DDR_REVERSED_P (ddr) = true;
4115
4116           /* In this case there is a dependence forward for all the
4117              outer loops:
4118
4119              | for (k = 1; k < 100; k++)
4120              |  for (i = 1; i < 100; i++)
4121              |   for (j = 1; j < 100; j++)
4122              |     {
4123              |       t = T[j+1][i-1];  // A
4124              |       T[j][i] = t + 2;  // B
4125              |     }
4126
4127              the vectors are:
4128              (0,  1, -1)
4129              (1,  1, -1)
4130              (1, -1,  1)
4131           */
4132           if (DDR_NB_LOOPS (ddr) > 1)
4133             {
4134               add_outer_distances (ddr, save_v, index_carry);
4135               add_outer_distances (ddr, dist_v, index_carry);
4136             }
4137         }
4138       else
4139         {
4140           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4141           lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4142
4143           if (DDR_NB_LOOPS (ddr) > 1)
4144             {
4145               lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4146
4147               if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
4148                                                   DDR_A (ddr), loop_nest))
4149                 return false;
4150               compute_subscript_distance (ddr);
4151               if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
4152                                                 opposite_v, &init_b,
4153                                                 &index_carry))
4154                 return false;
4155
4156               save_dist_v (ddr, save_v);
4157               add_outer_distances (ddr, dist_v, index_carry);
4158               add_outer_distances (ddr, opposite_v, index_carry);
4159             }
4160           else
4161             save_dist_v (ddr, save_v);
4162         }
4163     }
4164   else
4165     {
4166       /* There is a distance of 1 on all the outer loops: Example:
4167          there is a dependence of distance 1 on loop_1 for the array A.
4168
4169          | loop_1
4170          |   A[5] = ...
4171          | endloop
4172       */
4173       add_outer_distances (ddr, dist_v,
4174                            lambda_vector_first_nz (dist_v,
4175                                                    DDR_NB_LOOPS (ddr), 0));
4176     }
4177
4178   if (dump_file && (dump_flags & TDF_DETAILS))
4179     {
4180       unsigned i;
4181
4182       fprintf (dump_file, "(build_classic_dist_vector\n");
4183       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4184         {
4185           fprintf (dump_file, "  dist_vector = (");
4186           print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
4187                                DDR_NB_LOOPS (ddr));
4188           fprintf (dump_file, "  )\n");
4189         }
4190       fprintf (dump_file, ")\n");
4191     }
4192
4193   return true;
4194 }
4195
4196 /* Return the direction for a given distance.
4197    FIXME: Computing dir this way is suboptimal, since dir can catch
4198    cases that dist is unable to represent.  */
4199
4200 static inline enum data_dependence_direction
4201 dir_from_dist (int dist)
4202 {
4203   if (dist > 0)
4204     return dir_positive;
4205   else if (dist < 0)
4206     return dir_negative;
4207   else
4208     return dir_equal;
4209 }
4210
4211 /* Compute the classic per loop direction vector.  DDR is the data
4212    dependence relation to build a vector from.  */
4213
4214 static void
4215 build_classic_dir_vector (struct data_dependence_relation *ddr)
4216 {
4217   unsigned i, j;
4218   lambda_vector dist_v;
4219
4220   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
4221     {
4222       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4223
4224       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4225         dir_v[j] = dir_from_dist (dist_v[j]);
4226
4227       save_dir_v (ddr, dir_v);
4228     }
4229 }
4230
4231 /* Helper function.  Returns true when there is a dependence between
4232    data references DRA and DRB.  */
4233
4234 static bool
4235 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
4236                                struct data_reference *dra,
4237                                struct data_reference *drb,
4238                                struct loop *loop_nest)
4239 {
4240   unsigned int i;
4241   tree last_conflicts;
4242   struct subscript *subscript;
4243   tree res = NULL_TREE;
4244
4245   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
4246     {
4247       conflict_function *overlaps_a, *overlaps_b;
4248
4249       analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
4250                                       DR_ACCESS_FN (drb, i),
4251                                       &overlaps_a, &overlaps_b,
4252                                       &last_conflicts, loop_nest);
4253
4254       if (SUB_CONFLICTS_IN_A (subscript))
4255         free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4256       if (SUB_CONFLICTS_IN_B (subscript))
4257         free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4258
4259       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
4260       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
4261       SUB_LAST_CONFLICT (subscript) = last_conflicts;
4262
4263       /* If there is any undetermined conflict function we have to
4264          give a conservative answer in case we cannot prove that
4265          no dependence exists when analyzing another subscript.  */
4266       if (CF_NOT_KNOWN_P (overlaps_a)
4267           || CF_NOT_KNOWN_P (overlaps_b))
4268         {
4269           res = chrec_dont_know;
4270           continue;
4271         }
4272
4273       /* When there is a subscript with no dependence we can stop.  */
4274       else if (CF_NO_DEPENDENCE_P (overlaps_a)
4275                || CF_NO_DEPENDENCE_P (overlaps_b))
4276         {
4277           res = chrec_known;
4278           break;
4279         }
4280     }
4281
4282   if (res == NULL_TREE)
4283     return true;
4284
4285   if (res == chrec_known)
4286     dependence_stats.num_dependence_independent++;
4287   else
4288     dependence_stats.num_dependence_undetermined++;
4289   finalize_ddr_dependent (ddr, res);
4290   return false;
4291 }
4292
4293 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
4294
4295 static void
4296 subscript_dependence_tester (struct data_dependence_relation *ddr,
4297                              struct loop *loop_nest)
4298 {
4299   if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
4300     dependence_stats.num_dependence_dependent++;
4301
4302   compute_subscript_distance (ddr);
4303   if (build_classic_dist_vector (ddr, loop_nest))
4304     build_classic_dir_vector (ddr);
4305 }
4306
4307 /* Returns true when all the access functions of A are affine or
4308    constant with respect to LOOP_NEST.  */
4309
4310 static bool
4311 access_functions_are_affine_or_constant_p (const struct data_reference *a,
4312                                            const struct loop *loop_nest)
4313 {
4314   unsigned int i;
4315   vec<tree> fns = DR_ACCESS_FNS (a);
4316   tree t;
4317
4318   FOR_EACH_VEC_ELT (fns, i, t)
4319     if (!evolution_function_is_invariant_p (t, loop_nest->num)
4320         && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
4321       return false;
4322
4323   return true;
4324 }
4325
4326 /* This computes the affine dependence relation between A and B with
4327    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
4328    independence between two accesses, while CHREC_DONT_KNOW is used
4329    for representing the unknown relation.
4330
4331    Note that it is possible to stop the computation of the dependence
4332    relation the first time we detect a CHREC_KNOWN element for a given
4333    subscript.  */
4334
4335 void
4336 compute_affine_dependence (struct data_dependence_relation *ddr,
4337                            struct loop *loop_nest)
4338 {
4339   struct data_reference *dra = DDR_A (ddr);
4340   struct data_reference *drb = DDR_B (ddr);
4341
4342   if (dump_file && (dump_flags & TDF_DETAILS))
4343     {
4344       fprintf (dump_file, "(compute_affine_dependence\n");
4345       fprintf (dump_file, "  stmt_a: ");
4346       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4347       fprintf (dump_file, "  stmt_b: ");
4348       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4349     }
4350
4351   /* Analyze only when the dependence relation is not yet known.  */
4352   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4353     {
4354       dependence_stats.num_dependence_tests++;
4355
4356       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4357           && access_functions_are_affine_or_constant_p (drb, loop_nest))
4358         subscript_dependence_tester (ddr, loop_nest);
4359
4360       /* As a last case, if the dependence cannot be determined, or if
4361          the dependence is considered too difficult to determine, answer
4362          "don't know".  */
4363       else
4364         {
4365           dependence_stats.num_dependence_undetermined++;
4366
4367           if (dump_file && (dump_flags & TDF_DETAILS))
4368             {
4369               fprintf (dump_file, "Data ref a:\n");
4370               dump_data_reference (dump_file, dra);
4371               fprintf (dump_file, "Data ref b:\n");
4372               dump_data_reference (dump_file, drb);
4373               fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4374             }
4375           finalize_ddr_dependent (ddr, chrec_dont_know);
4376         }
4377     }
4378
4379   if (dump_file && (dump_flags & TDF_DETAILS))
4380     {
4381       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4382         fprintf (dump_file, ") -> no dependence\n");
4383       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4384         fprintf (dump_file, ") -> dependence analysis failed\n");
4385       else
4386         fprintf (dump_file, ")\n");
4387     }
4388 }
4389
4390 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4391    the data references in DATAREFS, in the LOOP_NEST.  When
4392    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4393    relations.  Return true when successful, i.e. data references number
4394    is small enough to be handled.  */
4395
4396 bool
4397 compute_all_dependences (vec<data_reference_p> datarefs,
4398                          vec<ddr_p> *dependence_relations,
4399                          vec<loop_p> loop_nest,
4400                          bool compute_self_and_rr)
4401 {
4402   struct data_dependence_relation *ddr;
4403   struct data_reference *a, *b;
4404   unsigned int i, j;
4405
4406   if ((int) datarefs.length ()
4407       > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4408     {
4409       struct data_dependence_relation *ddr;
4410
4411       /* Insert a single relation into dependence_relations:
4412          chrec_dont_know.  */
4413       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4414       dependence_relations->safe_push (ddr);
4415       return false;
4416     }
4417
4418   FOR_EACH_VEC_ELT (datarefs, i, a)
4419     for (j = i + 1; datarefs.iterate (j, &b); j++)
4420       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4421         {
4422           ddr = initialize_data_dependence_relation (a, b, loop_nest);
4423           dependence_relations->safe_push (ddr);
4424           if (loop_nest.exists ())
4425             compute_affine_dependence (ddr, loop_nest[0]);
4426         }
4427
4428   if (compute_self_and_rr)
4429     FOR_EACH_VEC_ELT (datarefs, i, a)
4430       {
4431         ddr = initialize_data_dependence_relation (a, a, loop_nest);
4432         dependence_relations->safe_push (ddr);
4433         if (loop_nest.exists ())
4434           compute_affine_dependence (ddr, loop_nest[0]);
4435       }
4436
4437   return true;
4438 }
4439
4440 /* Describes a location of a memory reference.  */
4441
4442 struct data_ref_loc
4443 {
4444   /* The memory reference.  */
4445   tree ref;
4446
4447   /* True if the memory reference is read.  */
4448   bool is_read;
4449 };
4450
4451
4452 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
4453    true if STMT clobbers memory, false otherwise.  */
4454
4455 static bool
4456 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
4457 {
4458   bool clobbers_memory = false;
4459   data_ref_loc ref;
4460   tree op0, op1;
4461   enum gimple_code stmt_code = gimple_code (stmt);
4462
4463   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4464      As we cannot model data-references to not spelled out
4465      accesses give up if they may occur.  */
4466   if (stmt_code == GIMPLE_CALL
4467       && !(gimple_call_flags (stmt) & ECF_CONST))
4468     {
4469       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
4470       if (gimple_call_internal_p (stmt))
4471         switch (gimple_call_internal_fn (stmt))
4472           {
4473           case IFN_GOMP_SIMD_LANE:
4474             {
4475               struct loop *loop = gimple_bb (stmt)->loop_father;
4476               tree uid = gimple_call_arg (stmt, 0);
4477               gcc_assert (TREE_CODE (uid) == SSA_NAME);
4478               if (loop == NULL
4479                   || loop->simduid != SSA_NAME_VAR (uid))
4480                 clobbers_memory = true;
4481               break;
4482             }
4483           case IFN_MASK_LOAD:
4484           case IFN_MASK_STORE:
4485             break;
4486           default:
4487             clobbers_memory = true;
4488             break;
4489           }
4490       else
4491         clobbers_memory = true;
4492     }
4493   else if (stmt_code == GIMPLE_ASM
4494            && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
4495                || gimple_vuse (stmt)))
4496     clobbers_memory = true;
4497
4498   if (!gimple_vuse (stmt))
4499     return clobbers_memory;
4500
4501   if (stmt_code == GIMPLE_ASSIGN)
4502     {
4503       tree base;
4504       op0 = gimple_assign_lhs (stmt);
4505       op1 = gimple_assign_rhs1 (stmt);
4506
4507       if (DECL_P (op1)
4508           || (REFERENCE_CLASS_P (op1)
4509               && (base = get_base_address (op1))
4510               && TREE_CODE (base) != SSA_NAME
4511               && !is_gimple_min_invariant (base)))
4512         {
4513           ref.ref = op1;
4514           ref.is_read = true;
4515           references->safe_push (ref);
4516         }
4517     }
4518   else if (stmt_code == GIMPLE_CALL)
4519     {
4520       unsigned i, n;
4521       tree ptr, type;
4522       unsigned int align;
4523
4524       ref.is_read = false;
4525       if (gimple_call_internal_p (stmt))
4526         switch (gimple_call_internal_fn (stmt))
4527           {
4528           case IFN_MASK_LOAD:
4529             if (gimple_call_lhs (stmt) == NULL_TREE)
4530               break;
4531             ref.is_read = true;
4532             /* FALLTHRU */
4533           case IFN_MASK_STORE:
4534             ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
4535             align = tree_to_shwi (gimple_call_arg (stmt, 1));
4536             if (ref.is_read)
4537               type = TREE_TYPE (gimple_call_lhs (stmt));
4538             else
4539               type = TREE_TYPE (gimple_call_arg (stmt, 3));
4540             if (TYPE_ALIGN (type) != align)
4541               type = build_aligned_type (type, align);
4542             ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
4543                                    ptr);
4544             references->safe_push (ref);
4545             return false;
4546           default:
4547             break;
4548           }
4549
4550       op0 = gimple_call_lhs (stmt);
4551       n = gimple_call_num_args (stmt);
4552       for (i = 0; i < n; i++)
4553         {
4554           op1 = gimple_call_arg (stmt, i);
4555
4556           if (DECL_P (op1)
4557               || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
4558             {
4559               ref.ref = op1;
4560               ref.is_read = true;
4561               references->safe_push (ref);
4562             }
4563         }
4564     }
4565   else
4566     return clobbers_memory;
4567
4568   if (op0
4569       && (DECL_P (op0)
4570           || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
4571     {
4572       ref.ref = op0;
4573       ref.is_read = false;
4574       references->safe_push (ref);
4575     }
4576   return clobbers_memory;
4577 }
4578
4579
4580 /* Returns true if the loop-nest has any data reference.  */
4581
4582 bool
4583 loop_nest_has_data_refs (loop_p loop)
4584 {
4585   basic_block *bbs = get_loop_body (loop);
4586   auto_vec<data_ref_loc, 3> references;
4587
4588   for (unsigned i = 0; i < loop->num_nodes; i++)
4589     {
4590       basic_block bb = bbs[i];
4591       gimple_stmt_iterator bsi;
4592
4593       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4594         {
4595           gimple *stmt = gsi_stmt (bsi);
4596           get_references_in_stmt (stmt, &references);
4597           if (references.length ())
4598             {
4599               free (bbs);
4600               return true;
4601             }
4602         }
4603     }
4604   free (bbs);
4605
4606   if (loop->inner)
4607     {
4608       loop = loop->inner;
4609       while (loop)
4610         {
4611           if (loop_nest_has_data_refs (loop))
4612             return true;
4613           loop = loop->next;
4614         }
4615     }
4616   return false;
4617 }
4618
4619 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
4620    reference, returns false, otherwise returns true.  NEST is the outermost
4621    loop of the loop nest in which the references should be analyzed.  */
4622
4623 bool
4624 find_data_references_in_stmt (struct loop *nest, gimple *stmt,
4625                               vec<data_reference_p> *datarefs)
4626 {
4627   unsigned i;
4628   auto_vec<data_ref_loc, 2> references;
4629   data_ref_loc *ref;
4630   bool ret = true;
4631   data_reference_p dr;
4632
4633   if (get_references_in_stmt (stmt, &references))
4634     return false;
4635
4636   FOR_EACH_VEC_ELT (references, i, ref)
4637     {
4638       dr = create_data_ref (nest, loop_containing_stmt (stmt),
4639                             ref->ref, stmt, ref->is_read);
4640       gcc_assert (dr != NULL);
4641       datarefs->safe_push (dr);
4642     }
4643
4644   return ret;
4645 }
4646
4647 /* Stores the data references in STMT to DATAREFS.  If there is an
4648    unanalyzable reference, returns false, otherwise returns true.
4649    NEST is the outermost loop of the loop nest in which the references
4650    should be instantiated, LOOP is the loop in which the references
4651    should be analyzed.  */
4652
4653 bool
4654 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple *stmt,
4655                                        vec<data_reference_p> *datarefs)
4656 {
4657   unsigned i;
4658   auto_vec<data_ref_loc, 2> references;
4659   data_ref_loc *ref;
4660   bool ret = true;
4661   data_reference_p dr;
4662
4663   if (get_references_in_stmt (stmt, &references))
4664     return false;
4665
4666   FOR_EACH_VEC_ELT (references, i, ref)
4667     {
4668       dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read);
4669       gcc_assert (dr != NULL);
4670       datarefs->safe_push (dr);
4671     }
4672
4673   return ret;
4674 }
4675
4676 /* Search the data references in LOOP, and record the information into
4677    DATAREFS.  Returns chrec_dont_know when failing to analyze a
4678    difficult case, returns NULL_TREE otherwise.  */
4679
4680 tree
4681 find_data_references_in_bb (struct loop *loop, basic_block bb,
4682                             vec<data_reference_p> *datarefs)
4683 {
4684   gimple_stmt_iterator bsi;
4685
4686   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4687     {
4688       gimple *stmt = gsi_stmt (bsi);
4689
4690       if (!find_data_references_in_stmt (loop, stmt, datarefs))
4691         {
4692           struct data_reference *res;
4693           res = XCNEW (struct data_reference);
4694           datarefs->safe_push (res);
4695
4696           return chrec_dont_know;
4697         }
4698     }
4699
4700   return NULL_TREE;
4701 }
4702
4703 /* Search the data references in LOOP, and record the information into
4704    DATAREFS.  Returns chrec_dont_know when failing to analyze a
4705    difficult case, returns NULL_TREE otherwise.
4706
4707    TODO: This function should be made smarter so that it can handle address
4708    arithmetic as if they were array accesses, etc.  */
4709
4710 tree
4711 find_data_references_in_loop (struct loop *loop,
4712                               vec<data_reference_p> *datarefs)
4713 {
4714   basic_block bb, *bbs;
4715   unsigned int i;
4716
4717   bbs = get_loop_body_in_dom_order (loop);
4718
4719   for (i = 0; i < loop->num_nodes; i++)
4720     {
4721       bb = bbs[i];
4722
4723       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4724         {
4725           free (bbs);
4726           return chrec_dont_know;
4727         }
4728     }
4729   free (bbs);
4730
4731   return NULL_TREE;
4732 }
4733
4734 /* Recursive helper function.  */
4735
4736 static bool
4737 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
4738 {
4739   /* Inner loops of the nest should not contain siblings.  Example:
4740      when there are two consecutive loops,
4741
4742      | loop_0
4743      |   loop_1
4744      |     A[{0, +, 1}_1]
4745      |   endloop_1
4746      |   loop_2
4747      |     A[{0, +, 1}_2]
4748      |   endloop_2
4749      | endloop_0
4750
4751      the dependence relation cannot be captured by the distance
4752      abstraction.  */
4753   if (loop->next)
4754     return false;
4755
4756   loop_nest->safe_push (loop);
4757   if (loop->inner)
4758     return find_loop_nest_1 (loop->inner, loop_nest);
4759   return true;
4760 }
4761
4762 /* Return false when the LOOP is not well nested.  Otherwise return
4763    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
4764    contain the loops from the outermost to the innermost, as they will
4765    appear in the classic distance vector.  */
4766
4767 bool
4768 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
4769 {
4770   loop_nest->safe_push (loop);
4771   if (loop->inner)
4772     return find_loop_nest_1 (loop->inner, loop_nest);
4773   return true;
4774 }
4775
4776 /* Returns true when the data dependences have been computed, false otherwise.
4777    Given a loop nest LOOP, the following vectors are returned:
4778    DATAREFS is initialized to all the array elements contained in this loop,
4779    DEPENDENCE_RELATIONS contains the relations between the data references.
4780    Compute read-read and self relations if
4781    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
4782
4783 bool
4784 compute_data_dependences_for_loop (struct loop *loop,
4785                                    bool compute_self_and_read_read_dependences,
4786                                    vec<loop_p> *loop_nest,
4787                                    vec<data_reference_p> *datarefs,
4788                                    vec<ddr_p> *dependence_relations)
4789 {
4790   bool res = true;
4791
4792   memset (&dependence_stats, 0, sizeof (dependence_stats));
4793
4794   /* If the loop nest is not well formed, or one of the data references
4795      is not computable, give up without spending time to compute other
4796      dependences.  */
4797   if (!loop
4798       || !find_loop_nest (loop, loop_nest)
4799       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
4800       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4801                                    compute_self_and_read_read_dependences))
4802     res = false;
4803
4804   if (dump_file && (dump_flags & TDF_STATS))
4805     {
4806       fprintf (dump_file, "Dependence tester statistics:\n");
4807
4808       fprintf (dump_file, "Number of dependence tests: %d\n",
4809                dependence_stats.num_dependence_tests);
4810       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4811                dependence_stats.num_dependence_dependent);
4812       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4813                dependence_stats.num_dependence_independent);
4814       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4815                dependence_stats.num_dependence_undetermined);
4816
4817       fprintf (dump_file, "Number of subscript tests: %d\n",
4818                dependence_stats.num_subscript_tests);
4819       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4820                dependence_stats.num_subscript_undetermined);
4821       fprintf (dump_file, "Number of same subscript function: %d\n",
4822                dependence_stats.num_same_subscript_function);
4823
4824       fprintf (dump_file, "Number of ziv tests: %d\n",
4825                dependence_stats.num_ziv);
4826       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4827                dependence_stats.num_ziv_dependent);
4828       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4829                dependence_stats.num_ziv_independent);
4830       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4831                dependence_stats.num_ziv_unimplemented);
4832
4833       fprintf (dump_file, "Number of siv tests: %d\n",
4834                dependence_stats.num_siv);
4835       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4836                dependence_stats.num_siv_dependent);
4837       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4838                dependence_stats.num_siv_independent);
4839       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4840                dependence_stats.num_siv_unimplemented);
4841
4842       fprintf (dump_file, "Number of miv tests: %d\n",
4843                dependence_stats.num_miv);
4844       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4845                dependence_stats.num_miv_dependent);
4846       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4847                dependence_stats.num_miv_independent);
4848       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4849                dependence_stats.num_miv_unimplemented);
4850     }
4851
4852   return res;
4853 }
4854
4855 /* Free the memory used by a data dependence relation DDR.  */
4856
4857 void
4858 free_dependence_relation (struct data_dependence_relation *ddr)
4859 {
4860   if (ddr == NULL)
4861     return;
4862
4863   if (DDR_SUBSCRIPTS (ddr).exists ())
4864     free_subscripts (DDR_SUBSCRIPTS (ddr));
4865   DDR_DIST_VECTS (ddr).release ();
4866   DDR_DIR_VECTS (ddr).release ();
4867
4868   free (ddr);
4869 }
4870
4871 /* Free the memory used by the data dependence relations from
4872    DEPENDENCE_RELATIONS.  */
4873
4874 void
4875 free_dependence_relations (vec<ddr_p> dependence_relations)
4876 {
4877   unsigned int i;
4878   struct data_dependence_relation *ddr;
4879
4880   FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
4881     if (ddr)
4882       free_dependence_relation (ddr);
4883
4884   dependence_relations.release ();
4885 }
4886
4887 /* Free the memory used by the data references from DATAREFS.  */
4888
4889 void
4890 free_data_refs (vec<data_reference_p> datarefs)
4891 {
4892   unsigned int i;
4893   struct data_reference *dr;
4894
4895   FOR_EACH_VEC_ELT (datarefs, i, dr)
4896     free_data_ref (dr);
4897   datarefs.release ();
4898 }