gcc/tree-data-ref.c

   1 /* Data references and dependences detectors.
   2    Copyright (C) 2003-2019 Free Software Foundation, Inc.
   3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass walks a given loop structure searching for array
  22    references.  The information about the array accesses is recorded
  23    in DATA_REFERENCE structures.
  24
  25    The basic test for determining the dependences is:
  26    given two access functions chrec1 and chrec2 to a same array, and
  27    x and y two vectors from the iteration domain, the same element of
  28    the array is accessed twice at iterations x and y if and only if:
  29    |             chrec1 (x) == chrec2 (y).
  30
  31    The goals of this analysis are:
  32
  33    - to determine the independence: the relation between two
  34      independent accesses is qualified with the chrec_known (this
  35      information allows a loop parallelization),
  36
  37    - when two data references access the same data, to qualify the
  38      dependence relation with classic dependence representations:
  39
  40        - distance vectors
  41        - direction vectors
  42        - loop carried level dependence
  43        - polyhedron dependence
  44      or with the chains of recurrences based representation,
  45
  46    - to define a knowledge base for storing the data dependence
  47      information,
  48
  49    - to define an interface to access this data.
  50
  51
  52    Definitions:
  53
  54    - subscript: given two array accesses a subscript is the tuple
  55    composed of the access functions for a given dimension.  Example:
  56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
  57    (f1, g1), (f2, g2), (f3, g3).
  58
  59    - Diophantine equation: an equation whose coefficients and
  60    solutions are integer constants, for example the equation
  61    |   3*x + 2*y = 1
  62    has an integer solution x = 1 and y = -1.
  63
  64    References:
  65
  66    - "Advanced Compilation for High Performance Computing" by Randy
  67    Allen and Ken Kennedy.
  68    http://citeseer.ist.psu.edu/goff91practical.html
  69
  70    - "Loop Transformations for Restructuring Compilers - The Foundations"
  71    by Utpal Banerjee.
  72
  73
  74 */
  75
  76 #include "config.h"
  77 #include "system.h"
  78 #include "coretypes.h"
  79 #include "backend.h"
  80 #include "rtl.h"
  81 #include "tree.h"
  82 #include "gimple.h"
  83 #include "gimple-pretty-print.h"
  84 #include "alias.h"
  85 #include "fold-const.h"
  86 #include "expr.h"
  87 #include "gimple-iterator.h"
  88 #include "tree-ssa-loop-niter.h"
  89 #include "tree-ssa-loop.h"
  90 #include "tree-ssa.h"
  91 #include "cfgloop.h"
  92 #include "tree-data-ref.h"
  93 #include "tree-scalar-evolution.h"
  94 #include "dumpfile.h"
  95 #include "tree-affine.h"
  96 #include "builtins.h"
  97 #include "tree-eh.h"
  98 #include "ssa.h"
  99
 100 static struct datadep_stats
 101 {
 102   int num_dependence_tests;
 103   int num_dependence_dependent;
 104   int num_dependence_independent;
 105   int num_dependence_undetermined;
 106
 107   int num_subscript_tests;
 108   int num_subscript_undetermined;
 109   int num_same_subscript_function;
 110
 111   int num_ziv;
 112   int num_ziv_independent;
 113   int num_ziv_dependent;
 114   int num_ziv_unimplemented;
 115
 116   int num_siv;
 117   int num_siv_independent;
 118   int num_siv_dependent;
 119   int num_siv_unimplemented;
 120
 121   int num_miv;
 122   int num_miv_independent;
 123   int num_miv_dependent;
 124   int num_miv_unimplemented;
 125 } dependence_stats;
 126
 127 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
 128                                            unsigned int, unsigned int,
 129                                            class loop *);
 130 /* Returns true iff A divides B.  */
 131
 132 static inline bool
 133 tree_fold_divides_p (const_tree a, const_tree b)
 134 {
 135   gcc_assert (TREE_CODE (a) == INTEGER_CST);
 136   gcc_assert (TREE_CODE (b) == INTEGER_CST);
 137   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
 138 }
 139
 140 /* Returns true iff A divides B.  */
 141
 142 static inline bool
 143 int_divides_p (int a, int b)
 144 {
 145   return ((b % a) == 0);
 146 }
 147
 148 /* Return true if reference REF contains a union access.  */
 149
 150 static bool
 151 ref_contains_union_access_p (tree ref)
 152 {
 153   while (handled_component_p (ref))
 154     {
 155       ref = TREE_OPERAND (ref, 0);
 156       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
 157           || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
 158         return true;
 159     }
 160   return false;
 161 }
 162
 163 \f
 164
 165 /* Dump into FILE all the data references from DATAREFS.  */
 166
 167 static void
 168 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
 169 {
 170   unsigned int i;
 171   struct data_reference *dr;
 172
 173   FOR_EACH_VEC_ELT (datarefs, i, dr)
 174     dump_data_reference (file, dr);
 175 }
 176
 177 /* Unified dump into FILE all the data references from DATAREFS.  */
 178
 179 DEBUG_FUNCTION void
 180 debug (vec<data_reference_p> &ref)
 181 {
 182   dump_data_references (stderr, ref);
 183 }
 184
 185 DEBUG_FUNCTION void
 186 debug (vec<data_reference_p> *ptr)
 187 {
 188   if (ptr)
 189     debug (*ptr);
 190   else
 191     fprintf (stderr, "<nil>\n");
 192 }
 193
 194
 195 /* Dump into STDERR all the data references from DATAREFS.  */
 196
 197 DEBUG_FUNCTION void
 198 debug_data_references (vec<data_reference_p> datarefs)
 199 {
 200   dump_data_references (stderr, datarefs);
 201 }
 202
 203 /* Print to STDERR the data_reference DR.  */
 204
 205 DEBUG_FUNCTION void
 206 debug_data_reference (struct data_reference *dr)
 207 {
 208   dump_data_reference (stderr, dr);
 209 }
 210
 211 /* Dump function for a DATA_REFERENCE structure.  */
 212
 213 void
 214 dump_data_reference (FILE *outf,
 215                      struct data_reference *dr)
 216 {
 217   unsigned int i;
 218
 219   fprintf (outf, "#(Data Ref: \n");
 220   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
 221   fprintf (outf, "#  stmt: ");
 222   print_gimple_stmt (outf, DR_STMT (dr), 0);
 223   fprintf (outf, "#  ref: ");
 224   print_generic_stmt (outf, DR_REF (dr));
 225   fprintf (outf, "#  base_object: ");
 226   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
 227
 228   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
 229     {
 230       fprintf (outf, "#  Access function %d: ", i);
 231       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
 232     }
 233   fprintf (outf, "#)\n");
 234 }
 235
 236 /* Unified dump function for a DATA_REFERENCE structure.  */
 237
 238 DEBUG_FUNCTION void
 239 debug (data_reference &ref)
 240 {
 241   dump_data_reference (stderr, &ref);
 242 }
 243
 244 DEBUG_FUNCTION void
 245 debug (data_reference *ptr)
 246 {
 247   if (ptr)
 248     debug (*ptr);
 249   else
 250     fprintf (stderr, "<nil>\n");
 251 }
 252
 253
 254 /* Dumps the affine function described by FN to the file OUTF.  */
 255
 256 DEBUG_FUNCTION void
 257 dump_affine_function (FILE *outf, affine_fn fn)
 258 {
 259   unsigned i;
 260   tree coef;
 261
 262   print_generic_expr (outf, fn[0], TDF_SLIM);
 263   for (i = 1; fn.iterate (i, &coef); i++)
 264     {
 265       fprintf (outf, " + ");
 266       print_generic_expr (outf, coef, TDF_SLIM);
 267       fprintf (outf, " * x_%u", i);
 268     }
 269 }
 270
 271 /* Dumps the conflict function CF to the file OUTF.  */
 272
 273 DEBUG_FUNCTION void
 274 dump_conflict_function (FILE *outf, conflict_function *cf)
 275 {
 276   unsigned i;
 277
 278   if (cf->n == NO_DEPENDENCE)
 279     fprintf (outf, "no dependence");
 280   else if (cf->n == NOT_KNOWN)
 281     fprintf (outf, "not known");
 282   else
 283     {
 284       for (i = 0; i < cf->n; i++)
 285         {
 286           if (i != 0)
 287             fprintf (outf, " ");
 288           fprintf (outf, "[");
 289           dump_affine_function (outf, cf->fns[i]);
 290           fprintf (outf, "]");
 291         }
 292     }
 293 }
 294
 295 /* Dump function for a SUBSCRIPT structure.  */
 296
 297 DEBUG_FUNCTION void
 298 dump_subscript (FILE *outf, struct subscript *subscript)
 299 {
 300   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
 301
 302   fprintf (outf, "\n (subscript \n");
 303   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
 304   dump_conflict_function (outf, cf);
 305   if (CF_NONTRIVIAL_P (cf))
 306     {
 307       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 308       fprintf (outf, "\n  last_conflict: ");
 309       print_generic_expr (outf, last_iteration);
 310     }
 311
 312   cf = SUB_CONFLICTS_IN_B (subscript);
 313   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
 314   dump_conflict_function (outf, cf);
 315   if (CF_NONTRIVIAL_P (cf))
 316     {
 317       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 318       fprintf (outf, "\n  last_conflict: ");
 319       print_generic_expr (outf, last_iteration);
 320     }
 321
 322   fprintf (outf, "\n  (Subscript distance: ");
 323   print_generic_expr (outf, SUB_DISTANCE (subscript));
 324   fprintf (outf, " ))\n");
 325 }
 326
 327 /* Print the classic direction vector DIRV to OUTF.  */
 328
 329 DEBUG_FUNCTION void
 330 print_direction_vector (FILE *outf,
 331                         lambda_vector dirv,
 332                         int length)
 333 {
 334   int eq;
 335
 336   for (eq = 0; eq < length; eq++)
 337     {
 338       enum data_dependence_direction dir = ((enum data_dependence_direction)
 339                                             dirv[eq]);
 340
 341       switch (dir)
 342         {
 343         case dir_positive:
 344           fprintf (outf, "    +");
 345           break;
 346         case dir_negative:
 347           fprintf (outf, "    -");
 348           break;
 349         case dir_equal:
 350           fprintf (outf, "    =");
 351           break;
 352         case dir_positive_or_equal:
 353           fprintf (outf, "   +=");
 354           break;
 355         case dir_positive_or_negative:
 356           fprintf (outf, "   +-");
 357           break;
 358         case dir_negative_or_equal:
 359           fprintf (outf, "   -=");
 360           break;
 361         case dir_star:
 362           fprintf (outf, "    *");
 363           break;
 364         default:
 365           fprintf (outf, "indep");
 366           break;
 367         }
 368     }
 369   fprintf (outf, "\n");
 370 }
 371
 372 /* Print a vector of direction vectors.  */
 373
 374 DEBUG_FUNCTION void
 375 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
 376                    int length)
 377 {
 378   unsigned j;
 379   lambda_vector v;
 380
 381   FOR_EACH_VEC_ELT (dir_vects, j, v)
 382     print_direction_vector (outf, v, length);
 383 }
 384
 385 /* Print out a vector VEC of length N to OUTFILE.  */
 386
 387 DEBUG_FUNCTION void
 388 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
 389 {
 390   int i;
 391
 392   for (i = 0; i < n; i++)
 393     fprintf (outfile, "%3d ", (int)vector[i]);
 394   fprintf (outfile, "\n");
 395 }
 396
 397 /* Print a vector of distance vectors.  */
 398
 399 DEBUG_FUNCTION void
 400 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
 401                     int length)
 402 {
 403   unsigned j;
 404   lambda_vector v;
 405
 406   FOR_EACH_VEC_ELT (dist_vects, j, v)
 407     print_lambda_vector (outf, v, length);
 408 }
 409
 410 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
 411
 412 DEBUG_FUNCTION void
 413 dump_data_dependence_relation (FILE *outf,
 414                                struct data_dependence_relation *ddr)
 415 {
 416   struct data_reference *dra, *drb;
 417
 418   fprintf (outf, "(Data Dep: \n");
 419
 420   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 421     {
 422       if (ddr)
 423         {
 424           dra = DDR_A (ddr);
 425           drb = DDR_B (ddr);
 426           if (dra)
 427             dump_data_reference (outf, dra);
 428           else
 429             fprintf (outf, "    (nil)\n");
 430           if (drb)
 431             dump_data_reference (outf, drb);
 432           else
 433             fprintf (outf, "    (nil)\n");
 434         }
 435       fprintf (outf, "    (don't know)\n)\n");
 436       return;
 437     }
 438
 439   dra = DDR_A (ddr);
 440   drb = DDR_B (ddr);
 441   dump_data_reference (outf, dra);
 442   dump_data_reference (outf, drb);
 443
 444   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 445     fprintf (outf, "    (no dependence)\n");
 446
 447   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
 448     {
 449       unsigned int i;
 450       class loop *loopi;
 451
 452       subscript *sub;
 453       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
 454         {
 455           fprintf (outf, "  access_fn_A: ");
 456           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
 457           fprintf (outf, "  access_fn_B: ");
 458           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
 459           dump_subscript (outf, sub);
 460         }
 461
 462       fprintf (outf, "  loop nest: (");
 463       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
 464         fprintf (outf, "%d ", loopi->num);
 465       fprintf (outf, ")\n");
 466
 467       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
 468         {
 469           fprintf (outf, "  distance_vector: ");
 470           print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
 471                                DDR_NB_LOOPS (ddr));
 472         }
 473
 474       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
 475         {
 476           fprintf (outf, "  direction_vector: ");
 477           print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
 478                                   DDR_NB_LOOPS (ddr));
 479         }
 480     }
 481
 482   fprintf (outf, ")\n");
 483 }
 484
 485 /* Debug version.  */
 486
 487 DEBUG_FUNCTION void
 488 debug_data_dependence_relation (struct data_dependence_relation *ddr)
 489 {
 490   dump_data_dependence_relation (stderr, ddr);
 491 }
 492
 493 /* Dump into FILE all the dependence relations from DDRS.  */
 494
 495 DEBUG_FUNCTION void
 496 dump_data_dependence_relations (FILE *file,
 497                                 vec<ddr_p> ddrs)
 498 {
 499   unsigned int i;
 500   struct data_dependence_relation *ddr;
 501
 502   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 503     dump_data_dependence_relation (file, ddr);
 504 }
 505
 506 DEBUG_FUNCTION void
 507 debug (vec<ddr_p> &ref)
 508 {
 509   dump_data_dependence_relations (stderr, ref);
 510 }
 511
 512 DEBUG_FUNCTION void
 513 debug (vec<ddr_p> *ptr)
 514 {
 515   if (ptr)
 516     debug (*ptr);
 517   else
 518     fprintf (stderr, "<nil>\n");
 519 }
 520
 521
 522 /* Dump to STDERR all the dependence relations from DDRS.  */
 523
 524 DEBUG_FUNCTION void
 525 debug_data_dependence_relations (vec<ddr_p> ddrs)
 526 {
 527   dump_data_dependence_relations (stderr, ddrs);
 528 }
 529
 530 /* Dumps the distance and direction vectors in FILE.  DDRS contains
 531    the dependence relations, and VECT_SIZE is the size of the
 532    dependence vectors, or in other words the number of loops in the
 533    considered nest.  */
 534
 535 DEBUG_FUNCTION void
 536 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
 537 {
 538   unsigned int i, j;
 539   struct data_dependence_relation *ddr;
 540   lambda_vector v;
 541
 542   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 543     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
 544       {
 545         FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
 546           {
 547             fprintf (file, "DISTANCE_V (");
 548             print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
 549             fprintf (file, ")\n");
 550           }
 551
 552         FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
 553           {
 554             fprintf (file, "DIRECTION_V (");
 555             print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
 556             fprintf (file, ")\n");
 557           }
 558       }
 559
 560   fprintf (file, "\n\n");
 561 }
 562
 563 /* Dumps the data dependence relations DDRS in FILE.  */
 564
 565 DEBUG_FUNCTION void
 566 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
 567 {
 568   unsigned int i;
 569   struct data_dependence_relation *ddr;
 570
 571   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 572     dump_data_dependence_relation (file, ddr);
 573
 574   fprintf (file, "\n\n");
 575 }
 576
 577 DEBUG_FUNCTION void
 578 debug_ddrs (vec<ddr_p> ddrs)
 579 {
 580   dump_ddrs (stderr, ddrs);
 581 }
 582
 583 static void
 584 split_constant_offset (tree exp, tree *var, tree *off,
 585                        hash_map<tree, std::pair<tree, tree> > &cache,
 586                        unsigned *limit);
 587
 588 /* Helper function for split_constant_offset.  Expresses OP0 CODE OP1
 589    (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
 590    constant of type ssizetype, and returns true.  If we cannot do this
 591    with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
 592    is returned.  */
 593
 594 static bool
 595 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 596                          tree *var, tree *off,
 597                          hash_map<tree, std::pair<tree, tree> > &cache,
 598                          unsigned *limit)
 599 {
 600   tree var0, var1;
 601   tree off0, off1;
 602   enum tree_code ocode = code;
 603
 604   *var = NULL_TREE;
 605   *off = NULL_TREE;
 606
 607   switch (code)
 608     {
 609     case INTEGER_CST:
 610       *var = build_int_cst (type, 0);
 611       *off = fold_convert (ssizetype, op0);
 612       return true;
 613
 614     case POINTER_PLUS_EXPR:
 615       ocode = PLUS_EXPR;
 616       /* FALLTHROUGH */
 617     case PLUS_EXPR:
 618     case MINUS_EXPR:
 619       if (TREE_CODE (op1) == INTEGER_CST)
 620         {
 621           split_constant_offset (op0, &var0, &off0, cache, limit);
 622           *var = var0;
 623           *off = size_binop (ocode, off0, fold_convert (ssizetype, op1));
 624           return true;
 625         }
 626       split_constant_offset (op0, &var0, &off0, cache, limit);
 627       split_constant_offset (op1, &var1, &off1, cache, limit);
 628       *var = fold_build2 (code, type, var0, var1);
 629       *off = size_binop (ocode, off0, off1);
 630       return true;
 631
 632     case MULT_EXPR:
 633       if (TREE_CODE (op1) != INTEGER_CST)
 634         return false;
 635
 636       split_constant_offset (op0, &var0, &off0, cache, limit);
 637       *var = fold_build2 (MULT_EXPR, type, var0, op1);
 638       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
 639       return true;
 640
 641     case ADDR_EXPR:
 642       {
 643         tree base, poffset;
 644         poly_int64 pbitsize, pbitpos, pbytepos;
 645         machine_mode pmode;
 646         int punsignedp, preversep, pvolatilep;
 647
 648         op0 = TREE_OPERAND (op0, 0);
 649         base
 650           = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
 651                                  &punsignedp, &preversep, &pvolatilep);
 652
 653         if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 654           return false;
 655         base = build_fold_addr_expr (base);
 656         off0 = ssize_int (pbytepos);
 657
 658         if (poffset)
 659           {
 660             split_constant_offset (poffset, &poffset, &off1, cache, limit);
 661             off0 = size_binop (PLUS_EXPR, off0, off1);
 662             if (POINTER_TYPE_P (TREE_TYPE (base)))
 663               base = fold_build_pointer_plus (base, poffset);
 664             else
 665               base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
 666                                   fold_convert (TREE_TYPE (base), poffset));
 667           }
 668
 669         var0 = fold_convert (type, base);
 670
 671         /* If variable length types are involved, punt, otherwise casts
 672            might be converted into ARRAY_REFs in gimplify_conversion.
 673            To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
 674            possibly no longer appears in current GIMPLE, might resurface.
 675            This perhaps could run
 676            if (CONVERT_EXPR_P (var0))
 677              {
 678                gimplify_conversion (&var0);
 679                // Attempt to fill in any within var0 found ARRAY_REF's
 680                // element size from corresponding op embedded ARRAY_REF,
 681                // if unsuccessful, just punt.
 682              }  */
 683         while (POINTER_TYPE_P (type))
 684           type = TREE_TYPE (type);
 685         if (int_size_in_bytes (type) < 0)
 686           return false;
 687
 688         *var = var0;
 689         *off = off0;
 690         return true;
 691       }
 692
 693     case SSA_NAME:
 694       {
 695         if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
 696           return false;
 697
 698         gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
 699         enum tree_code subcode;
 700
 701         if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
 702           return false;
 703
 704         subcode = gimple_assign_rhs_code (def_stmt);
 705
 706         /* We are using a cache to avoid un-CSEing large amounts of code.  */
 707         bool use_cache = false;
 708         if (!has_single_use (op0)
 709             && (subcode == POINTER_PLUS_EXPR
 710                 || subcode == PLUS_EXPR
 711                 || subcode == MINUS_EXPR
 712                 || subcode == MULT_EXPR
 713                 || subcode == ADDR_EXPR
 714                 || CONVERT_EXPR_CODE_P (subcode)))
 715           {
 716             use_cache = true;
 717             bool existed;
 718             std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
 719             if (existed)
 720               {
 721                 if (integer_zerop (e.second))
 722                   return false;
 723                 *var = e.first;
 724                 *off = e.second;
 725                 return true;
 726               }
 727             e = std::make_pair (op0, ssize_int (0));
 728           }
 729
 730         if (*limit == 0)
 731           return false;
 732         --*limit;
 733
 734         var0 = gimple_assign_rhs1 (def_stmt);
 735         var1 = gimple_assign_rhs2 (def_stmt);
 736
 737         bool res = split_constant_offset_1 (type, var0, subcode, var1,
 738                                             var, off, cache, limit);
 739         if (res && use_cache)
 740           *cache.get (op0) = std::make_pair (*var, *off);
 741         return res;
 742       }
 743     CASE_CONVERT:
 744       {
 745         /* We must not introduce undefined overflow, and we must not change
 746            the value.  Hence we're okay if the inner type doesn't overflow
 747            to start with (pointer or signed), the outer type also is an
 748            integer or pointer and the outer precision is at least as large
 749            as the inner.  */
 750         tree itype = TREE_TYPE (op0);
 751         if ((POINTER_TYPE_P (itype)
 752              || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
 753             && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
 754             && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
 755           {
 756             if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype))
 757               {
 758                 /* Split the unconverted operand and try to prove that
 759                    wrapping isn't a problem.  */
 760                 tree tmp_var, tmp_off;
 761                 split_constant_offset (op0, &tmp_var, &tmp_off, cache, limit);
 762
 763                 /* See whether we have an SSA_NAME whose range is known
 764                    to be [A, B].  */
 765                 if (TREE_CODE (tmp_var) != SSA_NAME)
 766                   return false;
 767                 wide_int var_min, var_max;
 768                 value_range_kind vr_type = get_range_info (tmp_var, &var_min,
 769                                                            &var_max);
 770                 wide_int var_nonzero = get_nonzero_bits (tmp_var);
 771                 signop sgn = TYPE_SIGN (itype);
 772                 if (intersect_range_with_nonzero_bits (vr_type, &var_min,
 773                                                        &var_max, var_nonzero,
 774                                                        sgn) != VR_RANGE)
 775                   return false;
 776
 777                 /* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
 778                    is known to be [A + TMP_OFF, B + TMP_OFF], with all
 779                    operations done in ITYPE.  The addition must overflow
 780                    at both ends of the range or at neither.  */
 781                 wi::overflow_type overflow[2];
 782                 unsigned int prec = TYPE_PRECISION (itype);
 783                 wide_int woff = wi::to_wide (tmp_off, prec);
 784                 wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
 785                 wi::add (var_max, woff, sgn, &overflow[1]);
 786                 if ((overflow[0] != wi::OVF_NONE) != (overflow[1] != wi::OVF_NONE))
 787                   return false;
 788
 789                 /* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR.  */
 790                 widest_int diff = (widest_int::from (op0_min, sgn)
 791                                    - widest_int::from (var_min, sgn));
 792                 var0 = tmp_var;
 793                 *off = wide_int_to_tree (ssizetype, diff);
 794               }
 795             else
 796               split_constant_offset (op0, &var0, off, cache, limit);
 797             *var = fold_convert (type, var0);
 798             return true;
 799           }
 800         return false;
 801       }
 802
 803     default:
 804       return false;
 805     }
 806 }
 807
 808 /* Expresses EXP as VAR + OFF, where off is a constant.  The type of OFF
 809    will be ssizetype.  */
 810
 811 static void
 812 split_constant_offset (tree exp, tree *var, tree *off,
 813                        hash_map<tree, std::pair<tree, tree> > &cache,
 814                        unsigned *limit)
 815 {
 816   tree type = TREE_TYPE (exp), op0, op1, e, o;
 817   enum tree_code code;
 818
 819   *var = exp;
 820   *off = ssize_int (0);
 821
 822   if (tree_is_chrec (exp)
 823       || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
 824     return;
 825
 826   code = TREE_CODE (exp);
 827   extract_ops_from_tree (exp, &code, &op0, &op1);
 828   if (split_constant_offset_1 (type, op0, code, op1, &e, &o, cache, limit))
 829     {
 830       *var = e;
 831       *off = o;
 832     }
 833 }
 834
 835 void
 836 split_constant_offset (tree exp, tree *var, tree *off)
 837 {
 838   unsigned limit = param_ssa_name_def_chain_limit;
 839   static hash_map<tree, std::pair<tree, tree> > *cache;
 840   if (!cache)
 841     cache = new hash_map<tree, std::pair<tree, tree> > (37);
 842   split_constant_offset (exp, var, off, *cache, &limit);
 843   cache->empty ();
 844 }
 845
 846 /* Returns the address ADDR of an object in a canonical shape (without nop
 847    casts, and with type of pointer to the object).  */
 848
 849 static tree
 850 canonicalize_base_object_address (tree addr)
 851 {
 852   tree orig = addr;
 853
 854   STRIP_NOPS (addr);
 855
 856   /* The base address may be obtained by casting from integer, in that case
 857      keep the cast.  */
 858   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
 859     return orig;
 860
 861   if (TREE_CODE (addr) != ADDR_EXPR)
 862     return addr;
 863
 864   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
 865 }
 866
 867 /* Analyze the behavior of memory reference REF within STMT.
 868    There are two modes:
 869
 870    - BB analysis.  In this case we simply split the address into base,
 871      init and offset components, without reference to any containing loop.
 872      The resulting base and offset are general expressions and they can
 873      vary arbitrarily from one iteration of the containing loop to the next.
 874      The step is always zero.
 875
 876    - loop analysis.  In this case we analyze the reference both wrt LOOP
 877      and on the basis that the reference occurs (is "used") in LOOP;
 878      see the comment above analyze_scalar_evolution_in_loop for more
 879      information about this distinction.  The base, init, offset and
 880      step fields are all invariant in LOOP.
 881
 882    Perform BB analysis if LOOP is null, or if LOOP is the function's
 883    dummy outermost loop.  In other cases perform loop analysis.
 884
 885    Return true if the analysis succeeded and store the results in DRB if so.
 886    BB analysis can only fail for bitfield or reversed-storage accesses.  */
 887
 888 opt_result
 889 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
 890                       class loop *loop, const gimple *stmt)
 891 {
 892   poly_int64 pbitsize, pbitpos;
 893   tree base, poffset;
 894   machine_mode pmode;
 895   int punsignedp, preversep, pvolatilep;
 896   affine_iv base_iv, offset_iv;
 897   tree init, dinit, step;
 898   bool in_loop = (loop && loop->num);
 899
 900   if (dump_file && (dump_flags & TDF_DETAILS))
 901     fprintf (dump_file, "analyze_innermost: ");
 902
 903   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
 904                               &punsignedp, &preversep, &pvolatilep);
 905   gcc_assert (base != NULL_TREE);
 906
 907   poly_int64 pbytepos;
 908   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 909     return opt_result::failure_at (stmt,
 910                                    "failed: bit offset alignment.\n");
 911
 912   if (preversep)
 913     return opt_result::failure_at (stmt,
 914                                    "failed: reverse storage order.\n");
 915
 916   /* Calculate the alignment and misalignment for the inner reference.  */
 917   unsigned int HOST_WIDE_INT bit_base_misalignment;
 918   unsigned int bit_base_alignment;
 919   get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
 920
 921   /* There are no bitfield references remaining in BASE, so the values
 922      we got back must be whole bytes.  */
 923   gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
 924               && bit_base_misalignment % BITS_PER_UNIT == 0);
 925   unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
 926   poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
 927
 928   if (TREE_CODE (base) == MEM_REF)
 929     {
 930       if (!integer_zerop (TREE_OPERAND (base, 1)))
 931         {
 932           /* Subtract MOFF from the base and add it to POFFSET instead.
 933              Adjust the misalignment to reflect the amount we subtracted.  */
 934           poly_offset_int moff = mem_ref_offset (base);
 935           base_misalignment -= moff.force_shwi ();
 936           tree mofft = wide_int_to_tree (sizetype, moff);
 937           if (!poffset)
 938             poffset = mofft;
 939           else
 940             poffset = size_binop (PLUS_EXPR, poffset, mofft);
 941         }
 942       base = TREE_OPERAND (base, 0);
 943     }
 944   else
 945     base = build_fold_addr_expr (base);
 946
 947   if (in_loop)
 948     {
 949       if (!simple_iv (loop, loop, base, &base_iv, true))
 950         return opt_result::failure_at
 951           (stmt, "failed: evolution of base is not affine.\n");
 952     }
 953   else
 954     {
 955       base_iv.base = base;
 956       base_iv.step = ssize_int (0);
 957       base_iv.no_overflow = true;
 958     }
 959
 960   if (!poffset)
 961     {
 962       offset_iv.base = ssize_int (0);
 963       offset_iv.step = ssize_int (0);
 964     }
 965   else
 966     {
 967       if (!in_loop)
 968         {
 969           offset_iv.base = poffset;
 970           offset_iv.step = ssize_int (0);
 971         }
 972       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
 973         return opt_result::failure_at
 974           (stmt, "failed: evolution of offset is not affine.\n");
 975     }
 976
 977   init = ssize_int (pbytepos);
 978
 979   /* Subtract any constant component from the base and add it to INIT instead.
 980      Adjust the misalignment to reflect the amount we subtracted.  */
 981   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
 982   init = size_binop (PLUS_EXPR, init, dinit);
 983   base_misalignment -= TREE_INT_CST_LOW (dinit);
 984
 985   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
 986   init = size_binop (PLUS_EXPR, init, dinit);
 987
 988   step = size_binop (PLUS_EXPR,
 989                      fold_convert (ssizetype, base_iv.step),
 990                      fold_convert (ssizetype, offset_iv.step));
 991
 992   base = canonicalize_base_object_address (base_iv.base);
 993
 994   /* See if get_pointer_alignment can guarantee a higher alignment than
 995      the one we calculated above.  */
 996   unsigned int HOST_WIDE_INT alt_misalignment;
 997   unsigned int alt_alignment;
 998   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
 999
1000   /* As above, these values must be whole bytes.  */
1001   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1002               && alt_misalignment % BITS_PER_UNIT == 0);
1003   alt_alignment /= BITS_PER_UNIT;
1004   alt_misalignment /= BITS_PER_UNIT;
1005
1006   if (base_alignment < alt_alignment)
1007     {
1008       base_alignment = alt_alignment;
1009       base_misalignment = alt_misalignment;
1010     }
1011
1012   drb->base_address = base;
1013   drb->offset = fold_convert (ssizetype, offset_iv.base);
1014   drb->init = init;
1015   drb->step = step;
1016   if (known_misalignment (base_misalignment, base_alignment,
1017                           &drb->base_misalignment))
1018     drb->base_alignment = base_alignment;
1019   else
1020     {
1021       drb->base_alignment = known_alignment (base_misalignment);
1022       drb->base_misalignment = 0;
1023     }
1024   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1025   drb->step_alignment = highest_pow2_factor (step);
1026
1027   if (dump_file && (dump_flags & TDF_DETAILS))
1028     fprintf (dump_file, "success.\n");
1029
1030   return opt_result::success ();
1031 }
1032
1033 /* Return true if OP is a valid component reference for a DR access
1034    function.  This accepts a subset of what handled_component_p accepts.  */
1035
1036 static bool
1037 access_fn_component_p (tree op)
1038 {
1039   switch (TREE_CODE (op))
1040     {
1041     case REALPART_EXPR:
1042     case IMAGPART_EXPR:
1043     case ARRAY_REF:
1044       return true;
1045
1046     case COMPONENT_REF:
1047       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1048
1049     default:
1050       return false;
1051     }
1052 }
1053
1054 /* Determines the base object and the list of indices of memory reference
1055    DR, analyzed in LOOP and instantiated before NEST.  */
1056
1057 static void
1058 dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
1059 {
1060   vec<tree> access_fns = vNULL;
1061   tree ref, op;
1062   tree base, off, access_fn;
1063
1064   /* If analyzing a basic-block there are no indices to analyze
1065      and thus no access functions.  */
1066   if (!nest)
1067     {
1068       DR_BASE_OBJECT (dr) = DR_REF (dr);
1069       DR_ACCESS_FNS (dr).create (0);
1070       return;
1071     }
1072
1073   ref = DR_REF (dr);
1074
1075   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1076      into a two element array with a constant index.  The base is
1077      then just the immediate underlying object.  */
1078   if (TREE_CODE (ref) == REALPART_EXPR)
1079     {
1080       ref = TREE_OPERAND (ref, 0);
1081       access_fns.safe_push (integer_zero_node);
1082     }
1083   else if (TREE_CODE (ref) == IMAGPART_EXPR)
1084     {
1085       ref = TREE_OPERAND (ref, 0);
1086       access_fns.safe_push (integer_one_node);
1087     }
1088
1089   /* Analyze access functions of dimensions we know to be independent.
1090      The list of component references handled here should be kept in
1091      sync with access_fn_component_p.  */
1092   while (handled_component_p (ref))
1093     {
1094       if (TREE_CODE (ref) == ARRAY_REF)
1095         {
1096           op = TREE_OPERAND (ref, 1);
1097           access_fn = analyze_scalar_evolution (loop, op);
1098           access_fn = instantiate_scev (nest, loop, access_fn);
1099           access_fns.safe_push (access_fn);
1100         }
1101       else if (TREE_CODE (ref) == COMPONENT_REF
1102                && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1103         {
1104           /* For COMPONENT_REFs of records (but not unions!) use the
1105              FIELD_DECL offset as constant access function so we can
1106              disambiguate a[i].f1 and a[i].f2.  */
1107           tree off = component_ref_field_offset (ref);
1108           off = size_binop (PLUS_EXPR,
1109                             size_binop (MULT_EXPR,
1110                                         fold_convert (bitsizetype, off),
1111                                         bitsize_int (BITS_PER_UNIT)),
1112                             DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1113           access_fns.safe_push (off);
1114         }
1115       else
1116         /* If we have an unhandled component we could not translate
1117            to an access function stop analyzing.  We have determined
1118            our base object in this case.  */
1119         break;
1120
1121       ref = TREE_OPERAND (ref, 0);
1122     }
1123
1124   /* If the address operand of a MEM_REF base has an evolution in the
1125      analyzed nest, add it as an additional independent access-function.  */
1126   if (TREE_CODE (ref) == MEM_REF)
1127     {
1128       op = TREE_OPERAND (ref, 0);
1129       access_fn = analyze_scalar_evolution (loop, op);
1130       access_fn = instantiate_scev (nest, loop, access_fn);
1131       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1132         {
1133           tree orig_type;
1134           tree memoff = TREE_OPERAND (ref, 1);
1135           base = initial_condition (access_fn);
1136           orig_type = TREE_TYPE (base);
1137           STRIP_USELESS_TYPE_CONVERSION (base);
1138           split_constant_offset (base, &base, &off);
1139           STRIP_USELESS_TYPE_CONVERSION (base);
1140           /* Fold the MEM_REF offset into the evolutions initial
1141              value to make more bases comparable.  */
1142           if (!integer_zerop (memoff))
1143             {
1144               off = size_binop (PLUS_EXPR, off,
1145                                 fold_convert (ssizetype, memoff));
1146               memoff = build_int_cst (TREE_TYPE (memoff), 0);
1147             }
1148           /* Adjust the offset so it is a multiple of the access type
1149              size and thus we separate bases that can possibly be used
1150              to produce partial overlaps (which the access_fn machinery
1151              cannot handle).  */
1152           wide_int rem;
1153           if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1154               && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1155               && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1156             rem = wi::mod_trunc
1157               (wi::to_wide (off),
1158                wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1159                SIGNED);
1160           else
1161             /* If we can't compute the remainder simply force the initial
1162                condition to zero.  */
1163             rem = wi::to_wide (off);
1164           off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1165           memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1166           /* And finally replace the initial condition.  */
1167           access_fn = chrec_replace_initial_condition
1168               (access_fn, fold_convert (orig_type, off));
1169           /* ???  This is still not a suitable base object for
1170              dr_may_alias_p - the base object needs to be an
1171              access that covers the object as whole.  With
1172              an evolution in the pointer this cannot be
1173              guaranteed.
1174              As a band-aid, mark the access so we can special-case
1175              it in dr_may_alias_p.  */
1176           tree old = ref;
1177           ref = fold_build2_loc (EXPR_LOCATION (ref),
1178                                  MEM_REF, TREE_TYPE (ref),
1179                                  base, memoff);
1180           MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1181           MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1182           DR_UNCONSTRAINED_BASE (dr) = true;
1183           access_fns.safe_push (access_fn);
1184         }
1185     }
1186   else if (DECL_P (ref))
1187     {
1188       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1189       ref = build2 (MEM_REF, TREE_TYPE (ref),
1190                     build_fold_addr_expr (ref),
1191                     build_int_cst (reference_alias_ptr_type (ref), 0));
1192     }
1193
1194   DR_BASE_OBJECT (dr) = ref;
1195   DR_ACCESS_FNS (dr) = access_fns;
1196 }
1197
1198 /* Extracts the alias analysis information from the memory reference DR.  */
1199
1200 static void
1201 dr_analyze_alias (struct data_reference *dr)
1202 {
1203   tree ref = DR_REF (dr);
1204   tree base = get_base_address (ref), addr;
1205
1206   if (INDIRECT_REF_P (base)
1207       || TREE_CODE (base) == MEM_REF)
1208     {
1209       addr = TREE_OPERAND (base, 0);
1210       if (TREE_CODE (addr) == SSA_NAME)
1211         DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1212     }
1213 }
1214
1215 /* Frees data reference DR.  */
1216
1217 void
1218 free_data_ref (data_reference_p dr)
1219 {
1220   DR_ACCESS_FNS (dr).release ();
1221   free (dr);
1222 }
1223
1224 /* Analyze memory reference MEMREF, which is accessed in STMT.
1225    The reference is a read if IS_READ is true, otherwise it is a write.
1226    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1227    within STMT, i.e. that it might not occur even if STMT is executed
1228    and runs to completion.
1229
1230    Return the data_reference description of MEMREF.  NEST is the outermost
1231    loop in which the reference should be instantiated, LOOP is the loop
1232    in which the data reference should be analyzed.  */
1233
1234 struct data_reference *
1235 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1236                  bool is_read, bool is_conditional_in_stmt)
1237 {
1238   struct data_reference *dr;
1239
1240   if (dump_file && (dump_flags & TDF_DETAILS))
1241     {
1242       fprintf (dump_file, "Creating dr for ");
1243       print_generic_expr (dump_file, memref, TDF_SLIM);
1244       fprintf (dump_file, "\n");
1245     }
1246
1247   dr = XCNEW (struct data_reference);
1248   DR_STMT (dr) = stmt;
1249   DR_REF (dr) = memref;
1250   DR_IS_READ (dr) = is_read;
1251   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1252
1253   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1254                         nest != NULL ? loop : NULL, stmt);
1255   dr_analyze_indices (dr, nest, loop);
1256   dr_analyze_alias (dr);
1257
1258   if (dump_file && (dump_flags & TDF_DETAILS))
1259     {
1260       unsigned i;
1261       fprintf (dump_file, "\tbase_address: ");
1262       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1263       fprintf (dump_file, "\n\toffset from base address: ");
1264       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1265       fprintf (dump_file, "\n\tconstant offset from base address: ");
1266       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1267       fprintf (dump_file, "\n\tstep: ");
1268       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1269       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1270       fprintf (dump_file, "\n\tbase misalignment: %d",
1271                DR_BASE_MISALIGNMENT (dr));
1272       fprintf (dump_file, "\n\toffset alignment: %d",
1273                DR_OFFSET_ALIGNMENT (dr));
1274       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1275       fprintf (dump_file, "\n\tbase_object: ");
1276       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1277       fprintf (dump_file, "\n");
1278       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1279         {
1280           fprintf (dump_file, "\tAccess function %d: ", i);
1281           print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1282         }
1283     }
1284
1285   return dr;
1286 }
1287
1288 /*  A helper function computes order between two tree expressions T1 and T2.
1289     This is used in comparator functions sorting objects based on the order
1290     of tree expressions.  The function returns -1, 0, or 1.  */
1291
1292 int
1293 data_ref_compare_tree (tree t1, tree t2)
1294 {
1295   int i, cmp;
1296   enum tree_code code;
1297   char tclass;
1298
1299   if (t1 == t2)
1300     return 0;
1301   if (t1 == NULL)
1302     return -1;
1303   if (t2 == NULL)
1304     return 1;
1305
1306   STRIP_USELESS_TYPE_CONVERSION (t1);
1307   STRIP_USELESS_TYPE_CONVERSION (t2);
1308   if (t1 == t2)
1309     return 0;
1310
1311   if (TREE_CODE (t1) != TREE_CODE (t2)
1312       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1313     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1314
1315   code = TREE_CODE (t1);
1316   switch (code)
1317     {
1318     case INTEGER_CST:
1319       return tree_int_cst_compare (t1, t2);
1320
1321     case STRING_CST:
1322       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1323         return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1324       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1325                      TREE_STRING_LENGTH (t1));
1326
1327     case SSA_NAME:
1328       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1329         return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1330       break;
1331
1332     default:
1333       if (POLY_INT_CST_P (t1))
1334         return compare_sizes_for_sort (wi::to_poly_widest (t1),
1335                                        wi::to_poly_widest (t2));
1336
1337       tclass = TREE_CODE_CLASS (code);
1338
1339       /* For decls, compare their UIDs.  */
1340       if (tclass == tcc_declaration)
1341         {
1342           if (DECL_UID (t1) != DECL_UID (t2))
1343             return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1344           break;
1345         }
1346       /* For expressions, compare their operands recursively.  */
1347       else if (IS_EXPR_CODE_CLASS (tclass))
1348         {
1349           for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1350             {
1351               cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1352                                            TREE_OPERAND (t2, i));
1353               if (cmp != 0)
1354                 return cmp;
1355             }
1356         }
1357       else
1358         gcc_unreachable ();
1359     }
1360
1361   return 0;
1362 }
1363
1364 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1365    check.  */
1366
1367 opt_result
1368 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1369 {
1370   if (dump_enabled_p ())
1371     dump_printf (MSG_NOTE,
1372                  "consider run-time aliasing test between %T and %T\n",
1373                  DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1374
1375   if (!speed_p)
1376     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1377                                    "runtime alias check not supported when"
1378                                    " optimizing for size.\n");
1379
1380   /* FORNOW: We don't support versioning with outer-loop in either
1381      vectorization or loop distribution.  */
1382   if (loop != NULL && loop->inner != NULL)
1383     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1384                                    "runtime alias check not supported for"
1385                                    " outer loop.\n");
1386
1387   return opt_result::success ();
1388 }
1389
1390 /* Operator == between two dr_with_seg_len objects.
1391
1392    This equality operator is used to make sure two data refs
1393    are the same one so that we will consider to combine the
1394    aliasing checks of those two pairs of data dependent data
1395    refs.  */
1396
1397 static bool
1398 operator == (const dr_with_seg_len& d1,
1399              const dr_with_seg_len& d2)
1400 {
1401   return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1402                            DR_BASE_ADDRESS (d2.dr), 0)
1403           && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1404           && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1405           && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1406           && known_eq (d1.access_size, d2.access_size)
1407           && d1.align == d2.align);
1408 }
1409
1410 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1411    so that we can combine aliasing checks in one scan.  */
1412
1413 static int
1414 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1415 {
1416   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1417   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1418   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1419   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1420
1421   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1422      if a and c have the same basic address snd step, and b and d have the same
1423      address and step.  Therefore, if any a&c or b&d don't have the same address
1424      and step, we don't care the order of those two pairs after sorting.  */
1425   int comp_res;
1426
1427   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1428                                          DR_BASE_ADDRESS (b1.dr))) != 0)
1429     return comp_res;
1430   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1431                                          DR_BASE_ADDRESS (b2.dr))) != 0)
1432     return comp_res;
1433   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1434                                          DR_STEP (b1.dr))) != 0)
1435     return comp_res;
1436   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1437                                          DR_STEP (b2.dr))) != 0)
1438     return comp_res;
1439   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1440                                          DR_OFFSET (b1.dr))) != 0)
1441     return comp_res;
1442   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1443                                          DR_INIT (b1.dr))) != 0)
1444     return comp_res;
1445   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1446                                          DR_OFFSET (b2.dr))) != 0)
1447     return comp_res;
1448   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1449                                          DR_INIT (b2.dr))) != 0)
1450     return comp_res;
1451
1452   return 0;
1453 }
1454
1455 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1456    FACTOR is number of iterations that each data reference is accessed.
1457
1458    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1459    we create an expression:
1460
1461    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1462    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1463
1464    for aliasing checks.  However, in some cases we can decrease the number
1465    of checks by combining two checks into one.  For example, suppose we have
1466    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1467    condition is satisfied:
1468
1469    load_ptr_0 < load_ptr_1  &&
1470    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1471
1472    (this condition means, in each iteration of vectorized loop, the accessed
1473    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1474    load_ptr_1.)
1475
1476    we then can use only the following expression to finish the alising checks
1477    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1478
1479    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1480    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1481
1482    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1483    basic address.  */
1484
1485 void
1486 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1487                                poly_uint64)
1488 {
1489   /* Canonicalize each pair so that the base components are ordered wrt
1490      data_ref_compare_tree.  This allows the loop below to merge more
1491      cases.  */
1492   unsigned int i;
1493   dr_with_seg_len_pair_t *alias_pair;
1494   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1495     {
1496       data_reference_p dr_a = alias_pair->first.dr;
1497       data_reference_p dr_b = alias_pair->second.dr;
1498       int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1499                                             DR_BASE_ADDRESS (dr_b));
1500       if (comp_res == 0)
1501         comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1502       if (comp_res == 0)
1503         comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1504       if (comp_res > 0)
1505         std::swap (alias_pair->first, alias_pair->second);
1506     }
1507
1508   /* Sort the collected data ref pairs so that we can scan them once to
1509      combine all possible aliasing checks.  */
1510   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1511
1512   /* Scan the sorted dr pairs and check if we can combine alias checks
1513      of two neighboring dr pairs.  */
1514   for (i = 1; i < alias_pairs->length (); ++i)
1515     {
1516       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1517       dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
1518                       *dr_b1 = &(*alias_pairs)[i-1].second,
1519                       *dr_a2 = &(*alias_pairs)[i].first,
1520                       *dr_b2 = &(*alias_pairs)[i].second;
1521
1522       /* Remove duplicate data ref pairs.  */
1523       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1524         {
1525           if (dump_enabled_p ())
1526             dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1527                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1528                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1529           alias_pairs->ordered_remove (i--);
1530           continue;
1531         }
1532
1533       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1534         {
1535           /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1536              and DR_A1 and DR_A2 are two consecutive memrefs.  */
1537           if (*dr_a1 == *dr_a2)
1538             {
1539               std::swap (dr_a1, dr_b1);
1540               std::swap (dr_a2, dr_b2);
1541             }
1542
1543           poly_int64 init_a1, init_a2;
1544           /* Only consider cases in which the distance between the initial
1545              DR_A1 and the initial DR_A2 is known at compile time.  */
1546           if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1547                                 DR_BASE_ADDRESS (dr_a2->dr), 0)
1548               || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1549                                    DR_OFFSET (dr_a2->dr), 0)
1550               || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1551               || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1552             continue;
1553
1554           /* Don't combine if we can't tell which one comes first.  */
1555           if (!ordered_p (init_a1, init_a2))
1556             continue;
1557
1558           /* Make sure dr_a1 starts left of dr_a2.  */
1559           if (maybe_gt (init_a1, init_a2))
1560             {
1561               std::swap (*dr_a1, *dr_a2);
1562               std::swap (init_a1, init_a2);
1563             }
1564
1565           /* Work out what the segment length would be if we did combine
1566              DR_A1 and DR_A2:
1567
1568              - If DR_A1 and DR_A2 have equal lengths, that length is
1569                also the combined length.
1570
1571              - If DR_A1 and DR_A2 both have negative "lengths", the combined
1572                length is the lower bound on those lengths.
1573
1574              - If DR_A1 and DR_A2 both have positive lengths, the combined
1575                length is the upper bound on those lengths.
1576
1577              Other cases are unlikely to give a useful combination.
1578
1579              The lengths both have sizetype, so the sign is taken from
1580              the step instead.  */
1581           if (!operand_equal_p (dr_a1->seg_len, dr_a2->seg_len, 0))
1582             {
1583               poly_uint64 seg_len_a1, seg_len_a2;
1584               if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1585                   || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1586                 continue;
1587
1588               tree indicator_a = dr_direction_indicator (dr_a1->dr);
1589               if (TREE_CODE (indicator_a) != INTEGER_CST)
1590                 continue;
1591
1592               tree indicator_b = dr_direction_indicator (dr_a2->dr);
1593               if (TREE_CODE (indicator_b) != INTEGER_CST)
1594                 continue;
1595
1596               int sign_a = tree_int_cst_sgn (indicator_a);
1597               int sign_b = tree_int_cst_sgn (indicator_b);
1598
1599               poly_uint64 new_seg_len;
1600               if (sign_a <= 0 && sign_b <= 0)
1601                 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1602               else if (sign_a >= 0 && sign_b >= 0)
1603                 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1604               else
1605                 continue;
1606
1607               dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1608                                               new_seg_len);
1609               dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1610             }
1611
1612           /* This is always positive due to the swap above.  */
1613           poly_uint64 diff = init_a2 - init_a1;
1614
1615           /* The new check will start at DR_A1.  Make sure that its access
1616              size encompasses the initial DR_A2.  */
1617           if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1618             {
1619               dr_a1->access_size = upper_bound (dr_a1->access_size,
1620                                                 diff + dr_a2->access_size);
1621               unsigned int new_align = known_alignment (dr_a1->access_size);
1622               dr_a1->align = MIN (dr_a1->align, new_align);
1623             }
1624           if (dump_enabled_p ())
1625             dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1626                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1627                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1628           alias_pairs->ordered_remove (i);
1629           i--;
1630         }
1631     }
1632 }
1633
1634 /* Given LOOP's two data references and segment lengths described by DR_A
1635    and DR_B, create expression checking if the two addresses ranges intersect
1636    with each other based on index of the two addresses.  This can only be
1637    done if DR_A and DR_B referring to the same (array) object and the index
1638    is the only difference.  For example:
1639
1640                        DR_A                           DR_B
1641       data-ref         arr[i]                         arr[j]
1642       base_object      arr                            arr
1643       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
1644
1645    The addresses and their index are like:
1646
1647         |<- ADDR_A    ->|          |<- ADDR_B    ->|
1648      ------------------------------------------------------->
1649         |   |   |   |   |          |   |   |   |   |
1650      ------------------------------------------------------->
1651         i_0 ...         i_0+4      j_0 ...         j_0+4
1652
1653    We can create expression based on index rather than address:
1654
1655      (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1656
1657    Note evolution step of index needs to be considered in comparison.  */
1658
1659 static bool
1660 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
1661                                      const dr_with_seg_len& dr_a,
1662                                      const dr_with_seg_len& dr_b)
1663 {
1664   if (integer_zerop (DR_STEP (dr_a.dr))
1665       || integer_zerop (DR_STEP (dr_b.dr))
1666       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1667     return false;
1668
1669   poly_uint64 seg_len1, seg_len2;
1670   if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
1671       || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
1672     return false;
1673
1674   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1675     return false;
1676
1677   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1678     return false;
1679
1680   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1681     return false;
1682
1683   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1684
1685   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1686   unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
1687   if (neg_step)
1688     {
1689       abs_step = -abs_step;
1690       seg_len1 = -seg_len1;
1691       seg_len2 = -seg_len2;
1692     }
1693   else
1694     {
1695       /* Include the access size in the length, so that we only have one
1696          tree addition below.  */
1697       seg_len1 += dr_a.access_size;
1698       seg_len2 += dr_b.access_size;
1699     }
1700
1701   /* Infer the number of iterations with which the memory segment is accessed
1702      by DR.  In other words, alias is checked if memory segment accessed by
1703      DR_A in some iterations intersect with memory segment accessed by DR_B
1704      in the same amount iterations.
1705      Note segnment length is a linear function of number of iterations with
1706      DR_STEP as the coefficient.  */
1707   poly_uint64 niter_len1, niter_len2;
1708   if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
1709       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
1710     return false;
1711
1712   poly_uint64 niter_access1 = 0, niter_access2 = 0;
1713   if (neg_step)
1714     {
1715       /* Divide each access size by the byte step, rounding up.  */
1716       if (!can_div_trunc_p (dr_a.access_size - abs_step - 1,
1717                             abs_step, &niter_access1)
1718           || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
1719                                abs_step, &niter_access2))
1720         return false;
1721     }
1722
1723   unsigned int i;
1724   for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1725     {
1726       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1727       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1728       /* Two indices must be the same if they are not scev, or not scev wrto
1729          current loop being vecorized.  */
1730       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1731           || TREE_CODE (access2) != POLYNOMIAL_CHREC
1732           || CHREC_VARIABLE (access1) != (unsigned)loop->num
1733           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1734         {
1735           if (operand_equal_p (access1, access2, 0))
1736             continue;
1737
1738           return false;
1739         }
1740       /* The two indices must have the same step.  */
1741       if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1742         return false;
1743
1744       tree idx_step = CHREC_RIGHT (access1);
1745       /* Index must have const step, otherwise DR_STEP won't be constant.  */
1746       gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1747       /* Index must evaluate in the same direction as DR.  */
1748       gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1749
1750       tree min1 = CHREC_LEFT (access1);
1751       tree min2 = CHREC_LEFT (access2);
1752       if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1753         return false;
1754
1755       /* Ideally, alias can be checked against loop's control IV, but we
1756          need to prove linear mapping between control IV and reference
1757          index.  Although that should be true, we check against (array)
1758          index of data reference.  Like segment length, index length is
1759          linear function of the number of iterations with index_step as
1760          the coefficient, i.e, niter_len * idx_step.  */
1761       tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1762                                    build_int_cst (TREE_TYPE (min1),
1763                                                   niter_len1));
1764       tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1765                                    build_int_cst (TREE_TYPE (min2),
1766                                                   niter_len2));
1767       tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
1768       tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
1769       /* Adjust ranges for negative step.  */
1770       if (neg_step)
1771         {
1772           /* IDX_LEN1 and IDX_LEN2 are negative in this case.  */
1773           std::swap (min1, max1);
1774           std::swap (min2, max2);
1775
1776           /* As with the lengths just calculated, we've measured the access
1777              sizes in iterations, so multiply them by the index step.  */
1778           tree idx_access1
1779             = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1780                            build_int_cst (TREE_TYPE (min1), niter_access1));
1781           tree idx_access2
1782             = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1783                            build_int_cst (TREE_TYPE (min2), niter_access2));
1784
1785           /* MINUS_EXPR because the above values are negative.  */
1786           max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (max1), max1, idx_access1);
1787           max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (max2), max2, idx_access2);
1788         }
1789       tree part_cond_expr
1790         = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1791             fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
1792             fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
1793       if (*cond_expr)
1794         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1795                                   *cond_expr, part_cond_expr);
1796       else
1797         *cond_expr = part_cond_expr;
1798     }
1799   return true;
1800 }
1801
1802 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
1803    every address ADDR accessed by D:
1804
1805      *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
1806
1807    In this case, every element accessed by D is aligned to at least
1808    ALIGN bytes.
1809
1810    If ALIGN is zero then instead set *SEG_MAX_OUT so that:
1811
1812      *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT.  */
1813
1814 static void
1815 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
1816                      tree *seg_max_out, HOST_WIDE_INT align)
1817 {
1818   /* Each access has the following pattern:
1819
1820           <- |seg_len| ->
1821           <--- A: -ve step --->
1822           +-----+-------+-----+-------+-----+
1823           | n-1 | ,.... |  0  | ..... | n-1 |
1824           +-----+-------+-----+-------+-----+
1825                         <--- B: +ve step --->
1826                         <- |seg_len| ->
1827                         |
1828                    base address
1829
1830      where "n" is the number of scalar iterations covered by the segment.
1831      (This should be VF for a particular pair if we know that both steps
1832      are the same, otherwise it will be the full number of scalar loop
1833      iterations.)
1834
1835      A is the range of bytes accessed when the step is negative,
1836      B is the range when the step is positive.
1837
1838      If the access size is "access_size" bytes, the lowest addressed byte is:
1839
1840          base + (step < 0 ? seg_len : 0)   [LB]
1841
1842      and the highest addressed byte is always below:
1843
1844          base + (step < 0 ? 0 : seg_len) + access_size   [UB]
1845
1846      Thus:
1847
1848          LB <= ADDR < UB
1849
1850      If ALIGN is nonzero, all three values are aligned to at least ALIGN
1851      bytes, so:
1852
1853          LB <= ADDR <= UB - ALIGN
1854
1855      where "- ALIGN" folds naturally with the "+ access_size" and often
1856      cancels it out.
1857
1858      We don't try to simplify LB and UB beyond this (e.g. by using
1859      MIN and MAX based on whether seg_len rather than the stride is
1860      negative) because it is possible for the absolute size of the
1861      segment to overflow the range of a ssize_t.
1862
1863      Keeping the pointer_plus outside of the cond_expr should allow
1864      the cond_exprs to be shared with other alias checks.  */
1865   tree indicator = dr_direction_indicator (d.dr);
1866   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
1867                                fold_convert (ssizetype, indicator),
1868                                ssize_int (0));
1869   tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
1870                                             DR_OFFSET (d.dr));
1871   addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
1872   tree seg_len
1873     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
1874
1875   tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1876                                 seg_len, size_zero_node);
1877   tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1878                                 size_zero_node, seg_len);
1879   max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
1880                            size_int (d.access_size - align));
1881
1882   *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
1883   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
1884 }
1885
1886 /* Given two data references and segment lengths described by DR_A and DR_B,
1887    create expression checking if the two addresses ranges intersect with
1888    each other:
1889
1890      ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
1891      || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0))  */
1892
1893 static void
1894 create_intersect_range_checks (class loop *loop, tree *cond_expr,
1895                                const dr_with_seg_len& dr_a,
1896                                const dr_with_seg_len& dr_b)
1897 {
1898   *cond_expr = NULL_TREE;
1899   if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
1900     return;
1901
1902   unsigned HOST_WIDE_INT min_align;
1903   tree_code cmp_code;
1904   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
1905       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
1906     {
1907       /* In this case adding access_size to seg_len is likely to give
1908          a simple X * step, where X is either the number of scalar
1909          iterations or the vectorization factor.  We're better off
1910          keeping that, rather than subtracting an alignment from it.
1911
1912          In this case the maximum values are exclusive and so there is
1913          no alias if the maximum of one segment equals the minimum
1914          of another.  */
1915       min_align = 0;
1916       cmp_code = LE_EXPR;
1917     }
1918   else
1919     {
1920       /* Calculate the minimum alignment shared by all four pointers,
1921          then arrange for this alignment to be subtracted from the
1922          exclusive maximum values to get inclusive maximum values.
1923          This "- min_align" is cumulative with a "+ access_size"
1924          in the calculation of the maximum values.  In the best
1925          (and common) case, the two cancel each other out, leaving
1926          us with an inclusive bound based only on seg_len.  In the
1927          worst case we're simply adding a smaller number than before.
1928
1929          Because the maximum values are inclusive, there is an alias
1930          if the maximum value of one segment is equal to the minimum
1931          value of the other.  */
1932       min_align = MIN (dr_a.align, dr_b.align);
1933       cmp_code = LT_EXPR;
1934     }
1935
1936   tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
1937   get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
1938   get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
1939
1940   *cond_expr
1941     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1942         fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
1943         fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
1944 }
1945
1946 /* Create a conditional expression that represents the run-time checks for
1947    overlapping of address ranges represented by a list of data references
1948    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
1949    COND_EXPR is the conditional expression to be used in the if statement
1950    that controls which version of the loop gets executed at runtime.  */
1951
1952 void
1953 create_runtime_alias_checks (class loop *loop,
1954                              vec<dr_with_seg_len_pair_t> *alias_pairs,
1955                              tree * cond_expr)
1956 {
1957   tree part_cond_expr;
1958
1959   fold_defer_overflow_warnings ();
1960   for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
1961     {
1962       const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
1963       const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
1964
1965       if (dump_enabled_p ())
1966         dump_printf (MSG_NOTE,
1967                      "create runtime check for data references %T and %T\n",
1968                      DR_REF (dr_a.dr), DR_REF (dr_b.dr));
1969
1970       /* Create condition expression for each pair data references.  */
1971       create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
1972       if (*cond_expr)
1973         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1974                                   *cond_expr, part_cond_expr);
1975       else
1976         *cond_expr = part_cond_expr;
1977     }
1978   fold_undefer_and_ignore_overflow_warnings ();
1979 }
1980
1981 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1982    expressions.  */
1983 static bool
1984 dr_equal_offsets_p1 (tree offset1, tree offset2)
1985 {
1986   bool res;
1987
1988   STRIP_NOPS (offset1);
1989   STRIP_NOPS (offset2);
1990
1991   if (offset1 == offset2)
1992     return true;
1993
1994   if (TREE_CODE (offset1) != TREE_CODE (offset2)
1995       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1996     return false;
1997
1998   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1999                              TREE_OPERAND (offset2, 0));
2000
2001   if (!res || !BINARY_CLASS_P (offset1))
2002     return res;
2003
2004   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2005                              TREE_OPERAND (offset2, 1));
2006
2007   return res;
2008 }
2009
2010 /* Check if DRA and DRB have equal offsets.  */
2011 bool
2012 dr_equal_offsets_p (struct data_reference *dra,
2013                     struct data_reference *drb)
2014 {
2015   tree offset1, offset2;
2016
2017   offset1 = DR_OFFSET (dra);
2018   offset2 = DR_OFFSET (drb);
2019
2020   return dr_equal_offsets_p1 (offset1, offset2);
2021 }
2022
2023 /* Returns true if FNA == FNB.  */
2024
2025 static bool
2026 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2027 {
2028   unsigned i, n = fna.length ();
2029
2030   if (n != fnb.length ())
2031     return false;
2032
2033   for (i = 0; i < n; i++)
2034     if (!operand_equal_p (fna[i], fnb[i], 0))
2035       return false;
2036
2037   return true;
2038 }
2039
2040 /* If all the functions in CF are the same, returns one of them,
2041    otherwise returns NULL.  */
2042
2043 static affine_fn
2044 common_affine_function (conflict_function *cf)
2045 {
2046   unsigned i;
2047   affine_fn comm;
2048
2049   if (!CF_NONTRIVIAL_P (cf))
2050     return affine_fn ();
2051
2052   comm = cf->fns[0];
2053
2054   for (i = 1; i < cf->n; i++)
2055     if (!affine_function_equal_p (comm, cf->fns[i]))
2056       return affine_fn ();
2057
2058   return comm;
2059 }
2060
2061 /* Returns the base of the affine function FN.  */
2062
2063 static tree
2064 affine_function_base (affine_fn fn)
2065 {
2066   return fn[0];
2067 }
2068
2069 /* Returns true if FN is a constant.  */
2070
2071 static bool
2072 affine_function_constant_p (affine_fn fn)
2073 {
2074   unsigned i;
2075   tree coef;
2076
2077   for (i = 1; fn.iterate (i, &coef); i++)
2078     if (!integer_zerop (coef))
2079       return false;
2080
2081   return true;
2082 }
2083
2084 /* Returns true if FN is the zero constant function.  */
2085
2086 static bool
2087 affine_function_zero_p (affine_fn fn)
2088 {
2089   return (integer_zerop (affine_function_base (fn))
2090           && affine_function_constant_p (fn));
2091 }
2092
2093 /* Returns a signed integer type with the largest precision from TA
2094    and TB.  */
2095
2096 static tree
2097 signed_type_for_types (tree ta, tree tb)
2098 {
2099   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2100     return signed_type_for (ta);
2101   else
2102     return signed_type_for (tb);
2103 }
2104
2105 /* Applies operation OP on affine functions FNA and FNB, and returns the
2106    result.  */
2107
2108 static affine_fn
2109 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2110 {
2111   unsigned i, n, m;
2112   affine_fn ret;
2113   tree coef;
2114
2115   if (fnb.length () > fna.length ())
2116     {
2117       n = fna.length ();
2118       m = fnb.length ();
2119     }
2120   else
2121     {
2122       n = fnb.length ();
2123       m = fna.length ();
2124     }
2125
2126   ret.create (m);
2127   for (i = 0; i < n; i++)
2128     {
2129       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2130                                          TREE_TYPE (fnb[i]));
2131       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2132     }
2133
2134   for (; fna.iterate (i, &coef); i++)
2135     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2136                                  coef, integer_zero_node));
2137   for (; fnb.iterate (i, &coef); i++)
2138     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2139                                  integer_zero_node, coef));
2140
2141   return ret;
2142 }
2143
2144 /* Returns the sum of affine functions FNA and FNB.  */
2145
2146 static affine_fn
2147 affine_fn_plus (affine_fn fna, affine_fn fnb)
2148 {
2149   return affine_fn_op (PLUS_EXPR, fna, fnb);
2150 }
2151
2152 /* Returns the difference of affine functions FNA and FNB.  */
2153
2154 static affine_fn
2155 affine_fn_minus (affine_fn fna, affine_fn fnb)
2156 {
2157   return affine_fn_op (MINUS_EXPR, fna, fnb);
2158 }
2159
2160 /* Frees affine function FN.  */
2161
2162 static void
2163 affine_fn_free (affine_fn fn)
2164 {
2165   fn.release ();
2166 }
2167
2168 /* Determine for each subscript in the data dependence relation DDR
2169    the distance.  */
2170
2171 static void
2172 compute_subscript_distance (struct data_dependence_relation *ddr)
2173 {
2174   conflict_function *cf_a, *cf_b;
2175   affine_fn fn_a, fn_b, diff;
2176
2177   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2178     {
2179       unsigned int i;
2180
2181       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2182         {
2183           struct subscript *subscript;
2184
2185           subscript = DDR_SUBSCRIPT (ddr, i);
2186           cf_a = SUB_CONFLICTS_IN_A (subscript);
2187           cf_b = SUB_CONFLICTS_IN_B (subscript);
2188
2189           fn_a = common_affine_function (cf_a);
2190           fn_b = common_affine_function (cf_b);
2191           if (!fn_a.exists () || !fn_b.exists ())
2192             {
2193               SUB_DISTANCE (subscript) = chrec_dont_know;
2194               return;
2195             }
2196           diff = affine_fn_minus (fn_a, fn_b);
2197
2198           if (affine_function_constant_p (diff))
2199             SUB_DISTANCE (subscript) = affine_function_base (diff);
2200           else
2201             SUB_DISTANCE (subscript) = chrec_dont_know;
2202
2203           affine_fn_free (diff);
2204         }
2205     }
2206 }
2207
2208 /* Returns the conflict function for "unknown".  */
2209
2210 static conflict_function *
2211 conflict_fn_not_known (void)
2212 {
2213   conflict_function *fn = XCNEW (conflict_function);
2214   fn->n = NOT_KNOWN;
2215
2216   return fn;
2217 }
2218
2219 /* Returns the conflict function for "independent".  */
2220
2221 static conflict_function *
2222 conflict_fn_no_dependence (void)
2223 {
2224   conflict_function *fn = XCNEW (conflict_function);
2225   fn->n = NO_DEPENDENCE;
2226
2227   return fn;
2228 }
2229
2230 /* Returns true if the address of OBJ is invariant in LOOP.  */
2231
2232 static bool
2233 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2234 {
2235   while (handled_component_p (obj))
2236     {
2237       if (TREE_CODE (obj) == ARRAY_REF)
2238         {
2239           for (int i = 1; i < 4; ++i)
2240             if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2241                                                         loop->num))
2242               return false;
2243         }
2244       else if (TREE_CODE (obj) == COMPONENT_REF)
2245         {
2246           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2247                                                       loop->num))
2248             return false;
2249         }
2250       obj = TREE_OPERAND (obj, 0);
2251     }
2252
2253   if (!INDIRECT_REF_P (obj)
2254       && TREE_CODE (obj) != MEM_REF)
2255     return true;
2256
2257   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2258                                                   loop->num);
2259 }
2260
2261 /* Returns false if we can prove that data references A and B do not alias,
2262    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2263    considered.  */
2264
2265 bool
2266 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2267                 class loop *loop_nest)
2268 {
2269   tree addr_a = DR_BASE_OBJECT (a);
2270   tree addr_b = DR_BASE_OBJECT (b);
2271
2272   /* If we are not processing a loop nest but scalar code we
2273      do not need to care about possible cross-iteration dependences
2274      and thus can process the full original reference.  Do so,
2275      similar to how loop invariant motion applies extra offset-based
2276      disambiguation.  */
2277   if (!loop_nest)
2278     {
2279       aff_tree off1, off2;
2280       poly_widest_int size1, size2;
2281       get_inner_reference_aff (DR_REF (a), &off1, &size1);
2282       get_inner_reference_aff (DR_REF (b), &off2, &size2);
2283       aff_combination_scale (&off1, -1);
2284       aff_combination_add (&off2, &off1);
2285       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2286         return false;
2287     }
2288
2289   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2290       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2291       /* For cross-iteration dependences the cliques must be valid for the
2292          whole loop, not just individual iterations.  */
2293       && (!loop_nest
2294           || MR_DEPENDENCE_CLIQUE (addr_a) == 1
2295           || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
2296       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2297       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2298     return false;
2299
2300   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2301      do not know the size of the base-object.  So we cannot do any
2302      offset/overlap based analysis but have to rely on points-to
2303      information only.  */
2304   if (TREE_CODE (addr_a) == MEM_REF
2305       && (DR_UNCONSTRAINED_BASE (a)
2306           || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2307     {
2308       /* For true dependences we can apply TBAA.  */
2309       if (flag_strict_aliasing
2310           && DR_IS_WRITE (a) && DR_IS_READ (b)
2311           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2312                                      get_alias_set (DR_REF (b))))
2313         return false;
2314       if (TREE_CODE (addr_b) == MEM_REF)
2315         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2316                                        TREE_OPERAND (addr_b, 0));
2317       else
2318         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2319                                        build_fold_addr_expr (addr_b));
2320     }
2321   else if (TREE_CODE (addr_b) == MEM_REF
2322            && (DR_UNCONSTRAINED_BASE (b)
2323                || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
2324     {
2325       /* For true dependences we can apply TBAA.  */
2326       if (flag_strict_aliasing
2327           && DR_IS_WRITE (a) && DR_IS_READ (b)
2328           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2329                                      get_alias_set (DR_REF (b))))
2330         return false;
2331       if (TREE_CODE (addr_a) == MEM_REF)
2332         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2333                                        TREE_OPERAND (addr_b, 0));
2334       else
2335         return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
2336                                        TREE_OPERAND (addr_b, 0));
2337     }
2338
2339   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2340      that is being subsetted in the loop nest.  */
2341   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2342     return refs_output_dependent_p (addr_a, addr_b);
2343   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2344     return refs_anti_dependent_p (addr_a, addr_b);
2345   return refs_may_alias_p (addr_a, addr_b);
2346 }
2347
2348 /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
2349    if it is meaningful to compare their associated access functions
2350    when checking for dependencies.  */
2351
2352 static bool
2353 access_fn_components_comparable_p (tree ref_a, tree ref_b)
2354 {
2355   /* Allow pairs of component refs from the following sets:
2356
2357        { REALPART_EXPR, IMAGPART_EXPR }
2358        { COMPONENT_REF }
2359        { ARRAY_REF }.  */
2360   tree_code code_a = TREE_CODE (ref_a);
2361   tree_code code_b = TREE_CODE (ref_b);
2362   if (code_a == IMAGPART_EXPR)
2363     code_a = REALPART_EXPR;
2364   if (code_b == IMAGPART_EXPR)
2365     code_b = REALPART_EXPR;
2366   if (code_a != code_b)
2367     return false;
2368
2369   if (TREE_CODE (ref_a) == COMPONENT_REF)
2370     /* ??? We cannot simply use the type of operand #0 of the refs here as
2371        the Fortran compiler smuggles type punning into COMPONENT_REFs.
2372        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
2373     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
2374             == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
2375
2376   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
2377                              TREE_TYPE (TREE_OPERAND (ref_b, 0)));
2378 }
2379
2380 /* Initialize a data dependence relation between data accesses A and
2381    B.  NB_LOOPS is the number of loops surrounding the references: the
2382    size of the classic distance/direction vectors.  */
2383
2384 struct data_dependence_relation *
2385 initialize_data_dependence_relation (struct data_reference *a,
2386                                      struct data_reference *b,
2387                                      vec<loop_p> loop_nest)
2388 {
2389   struct data_dependence_relation *res;
2390   unsigned int i;
2391
2392   res = XCNEW (struct data_dependence_relation);
2393   DDR_A (res) = a;
2394   DDR_B (res) = b;
2395   DDR_LOOP_NEST (res).create (0);
2396   DDR_SUBSCRIPTS (res).create (0);
2397   DDR_DIR_VECTS (res).create (0);
2398   DDR_DIST_VECTS (res).create (0);
2399
2400   if (a == NULL || b == NULL)
2401     {
2402       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2403       return res;
2404     }
2405
2406   /* If the data references do not alias, then they are independent.  */
2407   if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
2408     {
2409       DDR_ARE_DEPENDENT (res) = chrec_known;
2410       return res;
2411     }
2412
2413   unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
2414   unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
2415   if (num_dimensions_a == 0 || num_dimensions_b == 0)
2416     {
2417       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2418       return res;
2419     }
2420
2421   /* For unconstrained bases, the root (highest-indexed) subscript
2422      describes a variation in the base of the original DR_REF rather
2423      than a component access.  We have no type that accurately describes
2424      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
2425      applying this subscript) so limit the search to the last real
2426      component access.
2427
2428      E.g. for:
2429
2430         void
2431         f (int a[][8], int b[][8])
2432         {
2433           for (int i = 0; i < 8; ++i)
2434             a[i * 2][0] = b[i][0];
2435         }
2436
2437      the a and b accesses have a single ARRAY_REF component reference [0]
2438      but have two subscripts.  */
2439   if (DR_UNCONSTRAINED_BASE (a))
2440     num_dimensions_a -= 1;
2441   if (DR_UNCONSTRAINED_BASE (b))
2442     num_dimensions_b -= 1;
2443
2444   /* These structures describe sequences of component references in
2445      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
2446      specific access function.  */
2447   struct {
2448     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
2449        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
2450        indices.  In C notation, these are the indices of the rightmost
2451        component references; e.g. for a sequence .b.c.d, the start
2452        index is for .d.  */
2453     unsigned int start_a;
2454     unsigned int start_b;
2455
2456     /* The sequence contains LENGTH consecutive access functions from
2457        each DR.  */
2458     unsigned int length;
2459
2460     /* The enclosing objects for the A and B sequences respectively,
2461        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
2462        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
2463     tree object_a;
2464     tree object_b;
2465   } full_seq = {}, struct_seq = {};
2466
2467   /* Before each iteration of the loop:
2468
2469      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
2470      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
2471   unsigned int index_a = 0;
2472   unsigned int index_b = 0;
2473   tree ref_a = DR_REF (a);
2474   tree ref_b = DR_REF (b);
2475
2476   /* Now walk the component references from the final DR_REFs back up to
2477      the enclosing base objects.  Each component reference corresponds
2478      to one access function in the DR, with access function 0 being for
2479      the final DR_REF and the highest-indexed access function being the
2480      one that is applied to the base of the DR.
2481
2482      Look for a sequence of component references whose access functions
2483      are comparable (see access_fn_components_comparable_p).  If more
2484      than one such sequence exists, pick the one nearest the base
2485      (which is the leftmost sequence in C notation).  Store this sequence
2486      in FULL_SEQ.
2487
2488      For example, if we have:
2489
2490         struct foo { struct bar s; ... } (*a)[10], (*b)[10];
2491
2492         A: a[0][i].s.c.d
2493         B: __real b[0][i].s.e[i].f
2494
2495      (where d is the same type as the real component of f) then the access
2496      functions would be:
2497
2498                          0   1   2   3
2499         A:              .d  .c  .s [i]
2500
2501                  0   1   2   3   4   5
2502         B:  __real  .f [i]  .e  .s [i]
2503
2504      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
2505      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
2506      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
2507      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
2508      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
2509      index foo[10] arrays, so is again comparable.  The sequence is
2510      therefore:
2511
2512         A: [1, 3]  (i.e. [i].s.c)
2513         B: [3, 5]  (i.e. [i].s.e)
2514
2515      Also look for sequences of component references whose access
2516      functions are comparable and whose enclosing objects have the same
2517      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
2518      example, STRUCT_SEQ would be:
2519
2520         A: [1, 2]  (i.e. s.c)
2521         B: [3, 4]  (i.e. s.e)  */
2522   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
2523     {
2524       /* REF_A and REF_B must be one of the component access types
2525          allowed by dr_analyze_indices.  */
2526       gcc_checking_assert (access_fn_component_p (ref_a));
2527       gcc_checking_assert (access_fn_component_p (ref_b));
2528
2529       /* Get the immediately-enclosing objects for REF_A and REF_B,
2530          i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
2531          and DR_ACCESS_FN (B, INDEX_B).  */
2532       tree object_a = TREE_OPERAND (ref_a, 0);
2533       tree object_b = TREE_OPERAND (ref_b, 0);
2534
2535       tree type_a = TREE_TYPE (object_a);
2536       tree type_b = TREE_TYPE (object_b);
2537       if (access_fn_components_comparable_p (ref_a, ref_b))
2538         {
2539           /* This pair of component accesses is comparable for dependence
2540              analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
2541              DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
2542           if (full_seq.start_a + full_seq.length != index_a
2543               || full_seq.start_b + full_seq.length != index_b)
2544             {
2545               /* The accesses don't extend the current sequence,
2546                  so start a new one here.  */
2547               full_seq.start_a = index_a;
2548               full_seq.start_b = index_b;
2549               full_seq.length = 0;
2550             }
2551
2552           /* Add this pair of references to the sequence.  */
2553           full_seq.length += 1;
2554           full_seq.object_a = object_a;
2555           full_seq.object_b = object_b;
2556
2557           /* If the enclosing objects are structures (and thus have the
2558              same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
2559           if (TREE_CODE (type_a) == RECORD_TYPE)
2560             struct_seq = full_seq;
2561
2562           /* Move to the next containing reference for both A and B.  */
2563           ref_a = object_a;
2564           ref_b = object_b;
2565           index_a += 1;
2566           index_b += 1;
2567           continue;
2568         }
2569
2570       /* Try to approach equal type sizes.  */
2571       if (!COMPLETE_TYPE_P (type_a)
2572           || !COMPLETE_TYPE_P (type_b)
2573           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
2574           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
2575         break;
2576
2577       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
2578       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
2579       if (size_a <= size_b)
2580         {
2581           index_a += 1;
2582           ref_a = object_a;
2583         }
2584       if (size_b <= size_a)
2585         {
2586           index_b += 1;
2587           ref_b = object_b;
2588         }
2589     }
2590
2591   /* See whether FULL_SEQ ends at the base and whether the two bases
2592      are equal.  We do not care about TBAA or alignment info so we can
2593      use OEP_ADDRESS_OF to avoid false negatives.  */
2594   tree base_a = DR_BASE_OBJECT (a);
2595   tree base_b = DR_BASE_OBJECT (b);
2596   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
2597                       && full_seq.start_b + full_seq.length == num_dimensions_b
2598                       && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
2599                       && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
2600                       && types_compatible_p (TREE_TYPE (base_a),
2601                                              TREE_TYPE (base_b))
2602                       && (!loop_nest.exists ()
2603                           || (object_address_invariant_in_loop_p
2604                               (loop_nest[0], base_a))));
2605
2606   /* If the bases are the same, we can include the base variation too.
2607      E.g. the b accesses in:
2608
2609        for (int i = 0; i < n; ++i)
2610          b[i + 4][0] = b[i][0];
2611
2612      have a definite dependence distance of 4, while for:
2613
2614        for (int i = 0; i < n; ++i)
2615          a[i + 4][0] = b[i][0];
2616
2617      the dependence distance depends on the gap between a and b.
2618
2619      If the bases are different then we can only rely on the sequence
2620      rooted at a structure access, since arrays are allowed to overlap
2621      arbitrarily and change shape arbitrarily.  E.g. we treat this as
2622      valid code:
2623
2624        int a[256];
2625        ...
2626        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
2627
2628      where two lvalues with the same int[4][3] type overlap, and where
2629      both lvalues are distinct from the object's declared type.  */
2630   if (same_base_p)
2631     {
2632       if (DR_UNCONSTRAINED_BASE (a))
2633         full_seq.length += 1;
2634     }
2635   else
2636     full_seq = struct_seq;
2637
2638   /* Punt if we didn't find a suitable sequence.  */
2639   if (full_seq.length == 0)
2640     {
2641       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2642       return res;
2643     }
2644
2645   if (!same_base_p)
2646     {
2647       /* Partial overlap is possible for different bases when strict aliasing
2648          is not in effect.  It's also possible if either base involves a union
2649          access; e.g. for:
2650
2651            struct s1 { int a[2]; };
2652            struct s2 { struct s1 b; int c; };
2653            struct s3 { int d; struct s1 e; };
2654            union u { struct s2 f; struct s3 g; } *p, *q;
2655
2656          the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
2657          "p->g.e" (base "p->g") and might partially overlap the s1 at
2658          "q->g.e" (base "q->g").  */
2659       if (!flag_strict_aliasing
2660           || ref_contains_union_access_p (full_seq.object_a)
2661           || ref_contains_union_access_p (full_seq.object_b))
2662         {
2663           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2664           return res;
2665         }
2666
2667       DDR_COULD_BE_INDEPENDENT_P (res) = true;
2668       if (!loop_nest.exists ()
2669           || (object_address_invariant_in_loop_p (loop_nest[0],
2670                                                   full_seq.object_a)
2671               && object_address_invariant_in_loop_p (loop_nest[0],
2672                                                      full_seq.object_b)))
2673         {
2674           DDR_OBJECT_A (res) = full_seq.object_a;
2675           DDR_OBJECT_B (res) = full_seq.object_b;
2676         }
2677     }
2678
2679   DDR_AFFINE_P (res) = true;
2680   DDR_ARE_DEPENDENT (res) = NULL_TREE;
2681   DDR_SUBSCRIPTS (res).create (full_seq.length);
2682   DDR_LOOP_NEST (res) = loop_nest;
2683   DDR_SELF_REFERENCE (res) = false;
2684
2685   for (i = 0; i < full_seq.length; ++i)
2686     {
2687       struct subscript *subscript;
2688
2689       subscript = XNEW (struct subscript);
2690       SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
2691       SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
2692       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
2693       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
2694       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
2695       SUB_DISTANCE (subscript) = chrec_dont_know;
2696       DDR_SUBSCRIPTS (res).safe_push (subscript);
2697     }
2698
2699   return res;
2700 }
2701
2702 /* Frees memory used by the conflict function F.  */
2703
2704 static void
2705 free_conflict_function (conflict_function *f)
2706 {
2707   unsigned i;
2708
2709   if (CF_NONTRIVIAL_P (f))
2710     {
2711       for (i = 0; i < f->n; i++)
2712         affine_fn_free (f->fns[i]);
2713     }
2714   free (f);
2715 }
2716
2717 /* Frees memory used by SUBSCRIPTS.  */
2718
2719 static void
2720 free_subscripts (vec<subscript_p> subscripts)
2721 {
2722   unsigned i;
2723   subscript_p s;
2724
2725   FOR_EACH_VEC_ELT (subscripts, i, s)
2726     {
2727       free_conflict_function (s->conflicting_iterations_in_a);
2728       free_conflict_function (s->conflicting_iterations_in_b);
2729       free (s);
2730     }
2731   subscripts.release ();
2732 }
2733
2734 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
2735    description.  */
2736
2737 static inline void
2738 finalize_ddr_dependent (struct data_dependence_relation *ddr,
2739                         tree chrec)
2740 {
2741   DDR_ARE_DEPENDENT (ddr) = chrec;
2742   free_subscripts (DDR_SUBSCRIPTS (ddr));
2743   DDR_SUBSCRIPTS (ddr).create (0);
2744 }
2745
2746 /* The dependence relation DDR cannot be represented by a distance
2747    vector.  */
2748
2749 static inline void
2750 non_affine_dependence_relation (struct data_dependence_relation *ddr)
2751 {
2752   if (dump_file && (dump_flags & TDF_DETAILS))
2753     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
2754
2755   DDR_AFFINE_P (ddr) = false;
2756 }
2757
2758 \f
2759
2760 /* This section contains the classic Banerjee tests.  */
2761
2762 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
2763    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
2764
2765 static inline bool
2766 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2767 {
2768   return (evolution_function_is_constant_p (chrec_a)
2769           && evolution_function_is_constant_p (chrec_b));
2770 }
2771
2772 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
2773    variable, i.e., if the SIV (Single Index Variable) test is true.  */
2774
2775 static bool
2776 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2777 {
2778   if ((evolution_function_is_constant_p (chrec_a)
2779        && evolution_function_is_univariate_p (chrec_b))
2780       || (evolution_function_is_constant_p (chrec_b)
2781           && evolution_function_is_univariate_p (chrec_a)))
2782     return true;
2783
2784   if (evolution_function_is_univariate_p (chrec_a)
2785       && evolution_function_is_univariate_p (chrec_b))
2786     {
2787       switch (TREE_CODE (chrec_a))
2788         {
2789         case POLYNOMIAL_CHREC:
2790           switch (TREE_CODE (chrec_b))
2791             {
2792             case POLYNOMIAL_CHREC:
2793               if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
2794                 return false;
2795               /* FALLTHRU */
2796
2797             default:
2798               return true;
2799             }
2800
2801         default:
2802           return true;
2803         }
2804     }
2805
2806   return false;
2807 }
2808
2809 /* Creates a conflict function with N dimensions.  The affine functions
2810    in each dimension follow.  */
2811
2812 static conflict_function *
2813 conflict_fn (unsigned n, ...)
2814 {
2815   unsigned i;
2816   conflict_function *ret = XCNEW (conflict_function);
2817   va_list ap;
2818
2819   gcc_assert (n > 0 && n <= MAX_DIM);
2820   va_start (ap, n);
2821
2822   ret->n = n;
2823   for (i = 0; i < n; i++)
2824     ret->fns[i] = va_arg (ap, affine_fn);
2825   va_end (ap);
2826
2827   return ret;
2828 }
2829
2830 /* Returns constant affine function with value CST.  */
2831
2832 static affine_fn
2833 affine_fn_cst (tree cst)
2834 {
2835   affine_fn fn;
2836   fn.create (1);
2837   fn.quick_push (cst);
2838   return fn;
2839 }
2840
2841 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
2842
2843 static affine_fn
2844 affine_fn_univar (tree cst, unsigned dim, tree coef)
2845 {
2846   affine_fn fn;
2847   fn.create (dim + 1);
2848   unsigned i;
2849
2850   gcc_assert (dim > 0);
2851   fn.quick_push (cst);
2852   for (i = 1; i < dim; i++)
2853     fn.quick_push (integer_zero_node);
2854   fn.quick_push (coef);
2855   return fn;
2856 }
2857
2858 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
2859    *OVERLAPS_B are initialized to the functions that describe the
2860    relation between the elements accessed twice by CHREC_A and
2861    CHREC_B.  For k >= 0, the following property is verified:
2862
2863    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2864
2865 static void
2866 analyze_ziv_subscript (tree chrec_a,
2867                        tree chrec_b,
2868                        conflict_function **overlaps_a,
2869                        conflict_function **overlaps_b,
2870                        tree *last_conflicts)
2871 {
2872   tree type, difference;
2873   dependence_stats.num_ziv++;
2874
2875   if (dump_file && (dump_flags & TDF_DETAILS))
2876     fprintf (dump_file, "(analyze_ziv_subscript \n");
2877
2878   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2879   chrec_a = chrec_convert (type, chrec_a, NULL);
2880   chrec_b = chrec_convert (type, chrec_b, NULL);
2881   difference = chrec_fold_minus (type, chrec_a, chrec_b);
2882
2883   switch (TREE_CODE (difference))
2884     {
2885     case INTEGER_CST:
2886       if (integer_zerop (difference))
2887         {
2888           /* The difference is equal to zero: the accessed index
2889              overlaps for each iteration in the loop.  */
2890           *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2891           *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2892           *last_conflicts = chrec_dont_know;
2893           dependence_stats.num_ziv_dependent++;
2894         }
2895       else
2896         {
2897           /* The accesses do not overlap.  */
2898           *overlaps_a = conflict_fn_no_dependence ();
2899           *overlaps_b = conflict_fn_no_dependence ();
2900           *last_conflicts = integer_zero_node;
2901           dependence_stats.num_ziv_independent++;
2902         }
2903       break;
2904
2905     default:
2906       /* We're not sure whether the indexes overlap.  For the moment,
2907          conservatively answer "don't know".  */
2908       if (dump_file && (dump_flags & TDF_DETAILS))
2909         fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
2910
2911       *overlaps_a = conflict_fn_not_known ();
2912       *overlaps_b = conflict_fn_not_known ();
2913       *last_conflicts = chrec_dont_know;
2914       dependence_stats.num_ziv_unimplemented++;
2915       break;
2916     }
2917
2918   if (dump_file && (dump_flags & TDF_DETAILS))
2919     fprintf (dump_file, ")\n");
2920 }
2921
2922 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
2923    and only if it fits to the int type.  If this is not the case, or the
2924    bound  on the number of iterations of LOOP could not be derived, returns
2925    chrec_dont_know.  */
2926
2927 static tree
2928 max_stmt_executions_tree (class loop *loop)
2929 {
2930   widest_int nit;
2931
2932   if (!max_stmt_executions (loop, &nit))
2933     return chrec_dont_know;
2934
2935   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
2936     return chrec_dont_know;
2937
2938   return wide_int_to_tree (unsigned_type_node, nit);
2939 }
2940
2941 /* Determine whether the CHREC is always positive/negative.  If the expression
2942    cannot be statically analyzed, return false, otherwise set the answer into
2943    VALUE.  */
2944
2945 static bool
2946 chrec_is_positive (tree chrec, bool *value)
2947 {
2948   bool value0, value1, value2;
2949   tree end_value, nb_iter;
2950
2951   switch (TREE_CODE (chrec))
2952     {
2953     case POLYNOMIAL_CHREC:
2954       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
2955           || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
2956         return false;
2957
2958       /* FIXME -- overflows.  */
2959       if (value0 == value1)
2960         {
2961           *value = value0;
2962           return true;
2963         }
2964
2965       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
2966          and the proof consists in showing that the sign never
2967          changes during the execution of the loop, from 0 to
2968          loop->nb_iterations.  */
2969       if (!evolution_function_is_affine_p (chrec))
2970         return false;
2971
2972       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
2973       if (chrec_contains_undetermined (nb_iter))
2974         return false;
2975
2976 #if 0
2977       /* TODO -- If the test is after the exit, we may decrease the number of
2978          iterations by one.  */
2979       if (after_exit)
2980         nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
2981 #endif
2982
2983       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
2984
2985       if (!chrec_is_positive (end_value, &value2))
2986         return false;
2987
2988       *value = value0;
2989       return value0 == value1;
2990
2991     case INTEGER_CST:
2992       switch (tree_int_cst_sgn (chrec))
2993         {
2994         case -1:
2995           *value = false;
2996           break;
2997         case 1:
2998           *value = true;
2999           break;
3000         default:
3001           return false;
3002         }
3003       return true;
3004
3005     default:
3006       return false;
3007     }
3008 }
3009
3010
3011 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3012    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
3013    *OVERLAPS_B are initialized to the functions that describe the
3014    relation between the elements accessed twice by CHREC_A and
3015    CHREC_B.  For k >= 0, the following property is verified:
3016
3017    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3018
3019 static void
3020 analyze_siv_subscript_cst_affine (tree chrec_a,
3021                                   tree chrec_b,
3022                                   conflict_function **overlaps_a,
3023                                   conflict_function **overlaps_b,
3024                                   tree *last_conflicts)
3025 {
3026   bool value0, value1, value2;
3027   tree type, difference, tmp;
3028
3029   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3030   chrec_a = chrec_convert (type, chrec_a, NULL);
3031   chrec_b = chrec_convert (type, chrec_b, NULL);
3032   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3033
3034   /* Special case overlap in the first iteration.  */
3035   if (integer_zerop (difference))
3036     {
3037       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3038       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3039       *last_conflicts = integer_one_node;
3040       return;
3041     }
3042
3043   if (!chrec_is_positive (initial_condition (difference), &value0))
3044     {
3045       if (dump_file && (dump_flags & TDF_DETAILS))
3046         fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3047
3048       dependence_stats.num_siv_unimplemented++;
3049       *overlaps_a = conflict_fn_not_known ();
3050       *overlaps_b = conflict_fn_not_known ();
3051       *last_conflicts = chrec_dont_know;
3052       return;
3053     }
3054   else
3055     {
3056       if (value0 == false)
3057         {
3058           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3059               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3060             {
3061               if (dump_file && (dump_flags & TDF_DETAILS))
3062                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3063
3064               *overlaps_a = conflict_fn_not_known ();
3065               *overlaps_b = conflict_fn_not_known ();
3066               *last_conflicts = chrec_dont_know;
3067               dependence_stats.num_siv_unimplemented++;
3068               return;
3069             }
3070           else
3071             {
3072               if (value1 == true)
3073                 {
3074                   /* Example:
3075                      chrec_a = 12
3076                      chrec_b = {10, +, 1}
3077                   */
3078
3079                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3080                     {
3081                       HOST_WIDE_INT numiter;
3082                       class loop *loop = get_chrec_loop (chrec_b);
3083
3084                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3085                       tmp = fold_build2 (EXACT_DIV_EXPR, type,
3086                                          fold_build1 (ABS_EXPR, type, difference),
3087                                          CHREC_RIGHT (chrec_b));
3088                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3089                       *last_conflicts = integer_one_node;
3090
3091
3092                       /* Perform weak-zero siv test to see if overlap is
3093                          outside the loop bounds.  */
3094                       numiter = max_stmt_executions_int (loop);
3095
3096                       if (numiter >= 0
3097                           && compare_tree_int (tmp, numiter) > 0)
3098                         {
3099                           free_conflict_function (*overlaps_a);
3100                           free_conflict_function (*overlaps_b);
3101                           *overlaps_a = conflict_fn_no_dependence ();
3102                           *overlaps_b = conflict_fn_no_dependence ();
3103                           *last_conflicts = integer_zero_node;
3104                           dependence_stats.num_siv_independent++;
3105                           return;
3106                         }
3107                       dependence_stats.num_siv_dependent++;
3108                       return;
3109                     }
3110
3111                   /* When the step does not divide the difference, there are
3112                      no overlaps.  */
3113                   else
3114                     {
3115                       *overlaps_a = conflict_fn_no_dependence ();
3116                       *overlaps_b = conflict_fn_no_dependence ();
3117                       *last_conflicts = integer_zero_node;
3118                       dependence_stats.num_siv_independent++;
3119                       return;
3120                     }
3121                 }
3122
3123               else
3124                 {
3125                   /* Example:
3126                      chrec_a = 12
3127                      chrec_b = {10, +, -1}
3128
3129                      In this case, chrec_a will not overlap with chrec_b.  */
3130                   *overlaps_a = conflict_fn_no_dependence ();
3131                   *overlaps_b = conflict_fn_no_dependence ();
3132                   *last_conflicts = integer_zero_node;
3133                   dependence_stats.num_siv_independent++;
3134                   return;
3135                 }
3136             }
3137         }
3138       else
3139         {
3140           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3141               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3142             {
3143               if (dump_file && (dump_flags & TDF_DETAILS))
3144                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3145
3146               *overlaps_a = conflict_fn_not_known ();
3147               *overlaps_b = conflict_fn_not_known ();
3148               *last_conflicts = chrec_dont_know;
3149               dependence_stats.num_siv_unimplemented++;
3150               return;
3151             }
3152           else
3153             {
3154               if (value2 == false)
3155                 {
3156                   /* Example:
3157                      chrec_a = 3
3158                      chrec_b = {10, +, -1}
3159                   */
3160                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3161                     {
3162                       HOST_WIDE_INT numiter;
3163                       class loop *loop = get_chrec_loop (chrec_b);
3164
3165                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3166                       tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3167                                          CHREC_RIGHT (chrec_b));
3168                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3169                       *last_conflicts = integer_one_node;
3170
3171                       /* Perform weak-zero siv test to see if overlap is
3172                          outside the loop bounds.  */
3173                       numiter = max_stmt_executions_int (loop);
3174
3175                       if (numiter >= 0
3176                           && compare_tree_int (tmp, numiter) > 0)
3177                         {
3178                           free_conflict_function (*overlaps_a);
3179                           free_conflict_function (*overlaps_b);
3180                           *overlaps_a = conflict_fn_no_dependence ();
3181                           *overlaps_b = conflict_fn_no_dependence ();
3182                           *last_conflicts = integer_zero_node;
3183                           dependence_stats.num_siv_independent++;
3184                           return;
3185                         }
3186                       dependence_stats.num_siv_dependent++;
3187                       return;
3188                     }
3189
3190                   /* When the step does not divide the difference, there
3191                      are no overlaps.  */
3192                   else
3193                     {
3194                       *overlaps_a = conflict_fn_no_dependence ();
3195                       *overlaps_b = conflict_fn_no_dependence ();
3196                       *last_conflicts = integer_zero_node;
3197                       dependence_stats.num_siv_independent++;
3198                       return;
3199                     }
3200                 }
3201               else
3202                 {
3203                   /* Example:
3204                      chrec_a = 3
3205                      chrec_b = {4, +, 1}
3206
3207                      In this case, chrec_a will not overlap with chrec_b.  */
3208                   *overlaps_a = conflict_fn_no_dependence ();
3209                   *overlaps_b = conflict_fn_no_dependence ();
3210                   *last_conflicts = integer_zero_node;
3211                   dependence_stats.num_siv_independent++;
3212                   return;
3213                 }
3214             }
3215         }
3216     }
3217 }
3218
3219 /* Helper recursive function for initializing the matrix A.  Returns
3220    the initial value of CHREC.  */
3221
3222 static tree
3223 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3224 {
3225   gcc_assert (chrec);
3226
3227   switch (TREE_CODE (chrec))
3228     {
3229     case POLYNOMIAL_CHREC:
3230       if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
3231         return chrec_dont_know;
3232       A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
3233       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3234
3235     case PLUS_EXPR:
3236     case MULT_EXPR:
3237     case MINUS_EXPR:
3238       {
3239         tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3240         tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3241
3242         return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3243       }
3244
3245     CASE_CONVERT:
3246       {
3247         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3248         return chrec_convert (chrec_type (chrec), op, NULL);
3249       }
3250
3251     case BIT_NOT_EXPR:
3252       {
3253         /* Handle ~X as -1 - X.  */
3254         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3255         return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
3256                               build_int_cst (TREE_TYPE (chrec), -1), op);
3257       }
3258
3259     case INTEGER_CST:
3260       return chrec;
3261
3262     default:
3263       gcc_unreachable ();
3264       return NULL_TREE;
3265     }
3266 }
3267
3268 #define FLOOR_DIV(x,y) ((x) / (y))
3269
3270 /* Solves the special case of the Diophantine equation:
3271    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3272
3273    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
3274    number of iterations that loops X and Y run.  The overlaps will be
3275    constructed as evolutions in dimension DIM.  */
3276
3277 static void
3278 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
3279                                          HOST_WIDE_INT step_a,
3280                                          HOST_WIDE_INT step_b,
3281                                          affine_fn *overlaps_a,
3282                                          affine_fn *overlaps_b,
3283                                          tree *last_conflicts, int dim)
3284 {
3285   if (((step_a > 0 && step_b > 0)
3286        || (step_a < 0 && step_b < 0)))
3287     {
3288       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
3289       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
3290
3291       gcd_steps_a_b = gcd (step_a, step_b);
3292       step_overlaps_a = step_b / gcd_steps_a_b;
3293       step_overlaps_b = step_a / gcd_steps_a_b;
3294
3295       if (niter > 0)
3296         {
3297           tau2 = FLOOR_DIV (niter, step_overlaps_a);
3298           tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
3299           last_conflict = tau2;
3300           *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3301         }
3302       else
3303         *last_conflicts = chrec_dont_know;
3304
3305       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
3306                                       build_int_cst (NULL_TREE,
3307                                                      step_overlaps_a));
3308       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
3309                                       build_int_cst (NULL_TREE,
3310                                                      step_overlaps_b));
3311     }
3312
3313   else
3314     {
3315       *overlaps_a = affine_fn_cst (integer_zero_node);
3316       *overlaps_b = affine_fn_cst (integer_zero_node);
3317       *last_conflicts = integer_zero_node;
3318     }
3319 }
3320
3321 /* Solves the special case of a Diophantine equation where CHREC_A is
3322    an affine bivariate function, and CHREC_B is an affine univariate
3323    function.  For example,
3324
3325    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
3326
3327    has the following overlapping functions:
3328
3329    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
3330    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
3331    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
3332
3333    FORNOW: This is a specialized implementation for a case occurring in
3334    a common benchmark.  Implement the general algorithm.  */
3335
3336 static void
3337 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
3338                                       conflict_function **overlaps_a,
3339                                       conflict_function **overlaps_b,
3340                                       tree *last_conflicts)
3341 {
3342   bool xz_p, yz_p, xyz_p;
3343   HOST_WIDE_INT step_x, step_y, step_z;
3344   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
3345   affine_fn overlaps_a_xz, overlaps_b_xz;
3346   affine_fn overlaps_a_yz, overlaps_b_yz;
3347   affine_fn overlaps_a_xyz, overlaps_b_xyz;
3348   affine_fn ova1, ova2, ovb;
3349   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
3350
3351   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
3352   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
3353   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
3354
3355   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
3356   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
3357   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
3358
3359   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
3360     {
3361       if (dump_file && (dump_flags & TDF_DETAILS))
3362         fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
3363
3364       *overlaps_a = conflict_fn_not_known ();
3365       *overlaps_b = conflict_fn_not_known ();
3366       *last_conflicts = chrec_dont_know;
3367       return;
3368     }
3369
3370   niter = MIN (niter_x, niter_z);
3371   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
3372                                            &overlaps_a_xz,
3373                                            &overlaps_b_xz,
3374                                            &last_conflicts_xz, 1);
3375   niter = MIN (niter_y, niter_z);
3376   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
3377                                            &overlaps_a_yz,
3378                                            &overlaps_b_yz,
3379                                            &last_conflicts_yz, 2);
3380   niter = MIN (niter_x, niter_z);
3381   niter = MIN (niter_y, niter);
3382   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
3383                                            &overlaps_a_xyz,
3384                                            &overlaps_b_xyz,
3385                                            &last_conflicts_xyz, 3);
3386
3387   xz_p = !integer_zerop (last_conflicts_xz);
3388   yz_p = !integer_zerop (last_conflicts_yz);
3389   xyz_p = !integer_zerop (last_conflicts_xyz);
3390
3391   if (xz_p || yz_p || xyz_p)
3392     {
3393       ova1 = affine_fn_cst (integer_zero_node);
3394       ova2 = affine_fn_cst (integer_zero_node);
3395       ovb = affine_fn_cst (integer_zero_node);
3396       if (xz_p)
3397         {
3398           affine_fn t0 = ova1;
3399           affine_fn t2 = ovb;
3400
3401           ova1 = affine_fn_plus (ova1, overlaps_a_xz);
3402           ovb = affine_fn_plus (ovb, overlaps_b_xz);
3403           affine_fn_free (t0);
3404           affine_fn_free (t2);
3405           *last_conflicts = last_conflicts_xz;
3406         }
3407       if (yz_p)
3408         {
3409           affine_fn t0 = ova2;
3410           affine_fn t2 = ovb;
3411
3412           ova2 = affine_fn_plus (ova2, overlaps_a_yz);
3413           ovb = affine_fn_plus (ovb, overlaps_b_yz);
3414           affine_fn_free (t0);
3415           affine_fn_free (t2);
3416           *last_conflicts = last_conflicts_yz;
3417         }
3418       if (xyz_p)
3419         {
3420           affine_fn t0 = ova1;
3421           affine_fn t2 = ova2;
3422           affine_fn t4 = ovb;
3423
3424           ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
3425           ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
3426           ovb = affine_fn_plus (ovb, overlaps_b_xyz);
3427           affine_fn_free (t0);
3428           affine_fn_free (t2);
3429           affine_fn_free (t4);
3430           *last_conflicts = last_conflicts_xyz;
3431         }
3432       *overlaps_a = conflict_fn (2, ova1, ova2);
3433       *overlaps_b = conflict_fn (1, ovb);
3434     }
3435   else
3436     {
3437       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3438       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3439       *last_conflicts = integer_zero_node;
3440     }
3441
3442   affine_fn_free (overlaps_a_xz);
3443   affine_fn_free (overlaps_b_xz);
3444   affine_fn_free (overlaps_a_yz);
3445   affine_fn_free (overlaps_b_yz);
3446   affine_fn_free (overlaps_a_xyz);
3447   affine_fn_free (overlaps_b_xyz);
3448 }
3449
3450 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
3451
3452 static void
3453 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
3454                     int size)
3455 {
3456   memcpy (vec2, vec1, size * sizeof (*vec1));
3457 }
3458
3459 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
3460
3461 static void
3462 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
3463                     int m, int n)
3464 {
3465   int i;
3466
3467   for (i = 0; i < m; i++)
3468     lambda_vector_copy (mat1[i], mat2[i], n);
3469 }
3470
3471 /* Store the N x N identity matrix in MAT.  */
3472
3473 static void
3474 lambda_matrix_id (lambda_matrix mat, int size)
3475 {
3476   int i, j;
3477
3478   for (i = 0; i < size; i++)
3479     for (j = 0; j < size; j++)
3480       mat[i][j] = (i == j) ? 1 : 0;
3481 }
3482
3483 /* Return the index of the first nonzero element of vector VEC1 between
3484    START and N.  We must have START <= N.
3485    Returns N if VEC1 is the zero vector.  */
3486
3487 static int
3488 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
3489 {
3490   int j = start;
3491   while (j < n && vec1[j] == 0)
3492     j++;
3493   return j;
3494 }
3495
3496 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3497    R2 = R2 + CONST1 * R1.  */
3498
3499 static void
3500 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
3501                        lambda_int const1)
3502 {
3503   int i;
3504
3505   if (const1 == 0)
3506     return;
3507
3508   for (i = 0; i < n; i++)
3509     mat[r2][i] += const1 * mat[r1][i];
3510 }
3511
3512 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3513    and store the result in VEC2.  */
3514
3515 static void
3516 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
3517                           int size, lambda_int const1)
3518 {
3519   int i;
3520
3521   if (const1 == 0)
3522     lambda_vector_clear (vec2, size);
3523   else
3524     for (i = 0; i < size; i++)
3525       vec2[i] = const1 * vec1[i];
3526 }
3527
3528 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
3529
3530 static void
3531 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
3532                       int size)
3533 {
3534   lambda_vector_mult_const (vec1, vec2, size, -1);
3535 }
3536
3537 /* Negate row R1 of matrix MAT which has N columns.  */
3538
3539 static void
3540 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
3541 {
3542   lambda_vector_negate (mat[r1], mat[r1], n);
3543 }
3544
3545 /* Return true if two vectors are equal.  */
3546
3547 static bool
3548 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
3549 {
3550   int i;
3551   for (i = 0; i < size; i++)
3552     if (vec1[i] != vec2[i])
3553       return false;
3554   return true;
3555 }
3556
3557 /* Given an M x N integer matrix A, this function determines an M x
3558    M unimodular matrix U, and an M x N echelon matrix S such that
3559    "U.A = S".  This decomposition is also known as "right Hermite".
3560
3561    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3562    Restructuring Compilers" Utpal Banerjee.  */
3563
3564 static void
3565 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
3566                              lambda_matrix S, lambda_matrix U)
3567 {
3568   int i, j, i0 = 0;
3569
3570   lambda_matrix_copy (A, S, m, n);
3571   lambda_matrix_id (U, m);
3572
3573   for (j = 0; j < n; j++)
3574     {
3575       if (lambda_vector_first_nz (S[j], m, i0) < m)
3576         {
3577           ++i0;
3578           for (i = m - 1; i >= i0; i--)
3579             {
3580               while (S[i][j] != 0)
3581                 {
3582                   lambda_int sigma, factor, a, b;
3583
3584                   a = S[i-1][j];
3585                   b = S[i][j];
3586                   sigma = (a * b < 0) ? -1: 1;
3587                   a = abs_hwi (a);
3588                   b = abs_hwi (b);
3589                   factor = sigma * (a / b);
3590
3591                   lambda_matrix_row_add (S, n, i, i-1, -factor);
3592                   std::swap (S[i], S[i-1]);
3593
3594                   lambda_matrix_row_add (U, m, i, i-1, -factor);
3595                   std::swap (U[i], U[i-1]);
3596                 }
3597             }
3598         }
3599     }
3600 }
3601
3602 /* Determines the overlapping elements due to accesses CHREC_A and
3603    CHREC_B, that are affine functions.  This function cannot handle
3604    symbolic evolution functions, ie. when initial conditions are
3605    parameters, because it uses lambda matrices of integers.  */
3606
3607 static void
3608 analyze_subscript_affine_affine (tree chrec_a,
3609                                  tree chrec_b,
3610                                  conflict_function **overlaps_a,
3611                                  conflict_function **overlaps_b,
3612                                  tree *last_conflicts)
3613 {
3614   unsigned nb_vars_a, nb_vars_b, dim;
3615   HOST_WIDE_INT gamma, gcd_alpha_beta;
3616   lambda_matrix A, U, S;
3617   struct obstack scratch_obstack;
3618
3619   if (eq_evolutions_p (chrec_a, chrec_b))
3620     {
3621       /* The accessed index overlaps for each iteration in the
3622          loop.  */
3623       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3624       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3625       *last_conflicts = chrec_dont_know;
3626       return;
3627     }
3628   if (dump_file && (dump_flags & TDF_DETAILS))
3629     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
3630
3631   /* For determining the initial intersection, we have to solve a
3632      Diophantine equation.  This is the most time consuming part.
3633
3634      For answering to the question: "Is there a dependence?" we have
3635      to prove that there exists a solution to the Diophantine
3636      equation, and that the solution is in the iteration domain,
3637      i.e. the solution is positive or zero, and that the solution
3638      happens before the upper bound loop.nb_iterations.  Otherwise
3639      there is no dependence.  This function outputs a description of
3640      the iterations that hold the intersections.  */
3641
3642   nb_vars_a = nb_vars_in_chrec (chrec_a);
3643   nb_vars_b = nb_vars_in_chrec (chrec_b);
3644
3645   gcc_obstack_init (&scratch_obstack);
3646
3647   dim = nb_vars_a + nb_vars_b;
3648   U = lambda_matrix_new (dim, dim, &scratch_obstack);
3649   A = lambda_matrix_new (dim, 1, &scratch_obstack);
3650   S = lambda_matrix_new (dim, 1, &scratch_obstack);
3651
3652   tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
3653   tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
3654   if (init_a == chrec_dont_know
3655       || init_b == chrec_dont_know)
3656     {
3657       if (dump_file && (dump_flags & TDF_DETAILS))
3658         fprintf (dump_file, "affine-affine test failed: "
3659                  "representation issue.\n");
3660       *overlaps_a = conflict_fn_not_known ();
3661       *overlaps_b = conflict_fn_not_known ();
3662       *last_conflicts = chrec_dont_know;
3663       goto end_analyze_subs_aa;
3664     }
3665   gamma = int_cst_value (init_b) - int_cst_value (init_a);
3666
3667   /* Don't do all the hard work of solving the Diophantine equation
3668      when we already know the solution: for example,
3669      | {3, +, 1}_1
3670      | {3, +, 4}_2
3671      | gamma = 3 - 3 = 0.
3672      Then the first overlap occurs during the first iterations:
3673      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
3674   */
3675   if (gamma == 0)
3676     {
3677       if (nb_vars_a == 1 && nb_vars_b == 1)
3678         {
3679           HOST_WIDE_INT step_a, step_b;
3680           HOST_WIDE_INT niter, niter_a, niter_b;
3681           affine_fn ova, ovb;
3682
3683           niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
3684           niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
3685           niter = MIN (niter_a, niter_b);
3686           step_a = int_cst_value (CHREC_RIGHT (chrec_a));
3687           step_b = int_cst_value (CHREC_RIGHT (chrec_b));
3688
3689           compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
3690                                                    &ova, &ovb,
3691                                                    last_conflicts, 1);
3692           *overlaps_a = conflict_fn (1, ova);
3693           *overlaps_b = conflict_fn (1, ovb);
3694         }
3695
3696       else if (nb_vars_a == 2 && nb_vars_b == 1)
3697         compute_overlap_steps_for_affine_1_2
3698           (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
3699
3700       else if (nb_vars_a == 1 && nb_vars_b == 2)
3701         compute_overlap_steps_for_affine_1_2
3702           (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
3703
3704       else
3705         {
3706           if (dump_file && (dump_flags & TDF_DETAILS))
3707             fprintf (dump_file, "affine-affine test failed: too many variables.\n");
3708           *overlaps_a = conflict_fn_not_known ();
3709           *overlaps_b = conflict_fn_not_known ();
3710           *last_conflicts = chrec_dont_know;
3711         }
3712       goto end_analyze_subs_aa;
3713     }
3714
3715   /* U.A = S */
3716   lambda_matrix_right_hermite (A, dim, 1, S, U);
3717
3718   if (S[0][0] < 0)
3719     {
3720       S[0][0] *= -1;
3721       lambda_matrix_row_negate (U, dim, 0);
3722     }
3723   gcd_alpha_beta = S[0][0];
3724
3725   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
3726      but that is a quite strange case.  Instead of ICEing, answer
3727      don't know.  */
3728   if (gcd_alpha_beta == 0)
3729     {
3730       *overlaps_a = conflict_fn_not_known ();
3731       *overlaps_b = conflict_fn_not_known ();
3732       *last_conflicts = chrec_dont_know;
3733       goto end_analyze_subs_aa;
3734     }
3735
3736   /* The classic "gcd-test".  */
3737   if (!int_divides_p (gcd_alpha_beta, gamma))
3738     {
3739       /* The "gcd-test" has determined that there is no integer
3740          solution, i.e. there is no dependence.  */
3741       *overlaps_a = conflict_fn_no_dependence ();
3742       *overlaps_b = conflict_fn_no_dependence ();
3743       *last_conflicts = integer_zero_node;
3744     }
3745
3746   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
3747   else if (nb_vars_a == 1 && nb_vars_b == 1)
3748     {
3749       /* Both functions should have the same evolution sign.  */
3750       if (((A[0][0] > 0 && -A[1][0] > 0)
3751            || (A[0][0] < 0 && -A[1][0] < 0)))
3752         {
3753           /* The solutions are given by:
3754              |
3755              | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
3756              |                           [u21 u22]    [y0]
3757
3758              For a given integer t.  Using the following variables,
3759
3760              | i0 = u11 * gamma / gcd_alpha_beta
3761              | j0 = u12 * gamma / gcd_alpha_beta
3762              | i1 = u21
3763              | j1 = u22
3764
3765              the solutions are:
3766
3767              | x0 = i0 + i1 * t,
3768              | y0 = j0 + j1 * t.  */
3769           HOST_WIDE_INT i0, j0, i1, j1;
3770
3771           i0 = U[0][0] * gamma / gcd_alpha_beta;
3772           j0 = U[0][1] * gamma / gcd_alpha_beta;
3773           i1 = U[1][0];
3774           j1 = U[1][1];
3775
3776           if ((i1 == 0 && i0 < 0)
3777               || (j1 == 0 && j0 < 0))
3778             {
3779               /* There is no solution.
3780                  FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
3781                  falls in here, but for the moment we don't look at the
3782                  upper bound of the iteration domain.  */
3783               *overlaps_a = conflict_fn_no_dependence ();
3784               *overlaps_b = conflict_fn_no_dependence ();
3785               *last_conflicts = integer_zero_node;
3786               goto end_analyze_subs_aa;
3787             }
3788
3789           if (i1 > 0 && j1 > 0)
3790             {
3791               HOST_WIDE_INT niter_a
3792                 = max_stmt_executions_int (get_chrec_loop (chrec_a));
3793               HOST_WIDE_INT niter_b
3794                 = max_stmt_executions_int (get_chrec_loop (chrec_b));
3795               HOST_WIDE_INT niter = MIN (niter_a, niter_b);
3796
3797               /* (X0, Y0) is a solution of the Diophantine equation:
3798                  "chrec_a (X0) = chrec_b (Y0)".  */
3799               HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
3800                                         CEIL (-j0, j1));
3801               HOST_WIDE_INT x0 = i1 * tau1 + i0;
3802               HOST_WIDE_INT y0 = j1 * tau1 + j0;
3803
3804               /* (X1, Y1) is the smallest positive solution of the eq
3805                  "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
3806                  first conflict occurs.  */
3807               HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
3808               HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
3809               HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
3810
3811               if (niter > 0)
3812                 {
3813                   /* If the overlap occurs outside of the bounds of the
3814                      loop, there is no dependence.  */
3815                   if (x1 >= niter_a || y1 >= niter_b)
3816                     {
3817                       *overlaps_a = conflict_fn_no_dependence ();
3818                       *overlaps_b = conflict_fn_no_dependence ();
3819                       *last_conflicts = integer_zero_node;
3820                       goto end_analyze_subs_aa;
3821                     }
3822
3823                   /* max stmt executions can get quite large, avoid
3824                      overflows by using wide ints here.  */
3825                   widest_int tau2
3826                     = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
3827                                 wi::sdiv_floor (wi::sub (niter_b, j0), j1));
3828                   widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
3829                   if (wi::min_precision (last_conflict, SIGNED)
3830                       <= TYPE_PRECISION (integer_type_node))
3831                     *last_conflicts
3832                        = build_int_cst (integer_type_node,
3833                                         last_conflict.to_shwi ());
3834                   else
3835                     *last_conflicts = chrec_dont_know;
3836                 }
3837               else
3838                 *last_conflicts = chrec_dont_know;
3839
3840               *overlaps_a
3841                 = conflict_fn (1,
3842                                affine_fn_univar (build_int_cst (NULL_TREE, x1),
3843                                                  1,
3844                                                  build_int_cst (NULL_TREE, i1)));
3845               *overlaps_b
3846                 = conflict_fn (1,
3847                                affine_fn_univar (build_int_cst (NULL_TREE, y1),
3848                                                  1,
3849                                                  build_int_cst (NULL_TREE, j1)));
3850             }
3851           else
3852             {
3853               /* FIXME: For the moment, the upper bound of the
3854                  iteration domain for i and j is not checked.  */
3855               if (dump_file && (dump_flags & TDF_DETAILS))
3856                 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3857               *overlaps_a = conflict_fn_not_known ();
3858               *overlaps_b = conflict_fn_not_known ();
3859               *last_conflicts = chrec_dont_know;
3860             }
3861         }
3862       else
3863         {
3864           if (dump_file && (dump_flags & TDF_DETAILS))
3865             fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3866           *overlaps_a = conflict_fn_not_known ();
3867           *overlaps_b = conflict_fn_not_known ();
3868           *last_conflicts = chrec_dont_know;
3869         }
3870     }
3871   else
3872     {
3873       if (dump_file && (dump_flags & TDF_DETAILS))
3874         fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3875       *overlaps_a = conflict_fn_not_known ();
3876       *overlaps_b = conflict_fn_not_known ();
3877       *last_conflicts = chrec_dont_know;
3878     }
3879
3880 end_analyze_subs_aa:
3881   obstack_free (&scratch_obstack, NULL);
3882   if (dump_file && (dump_flags & TDF_DETAILS))
3883     {
3884       fprintf (dump_file, "  (overlaps_a = ");
3885       dump_conflict_function (dump_file, *overlaps_a);
3886       fprintf (dump_file, ")\n  (overlaps_b = ");
3887       dump_conflict_function (dump_file, *overlaps_b);
3888       fprintf (dump_file, "))\n");
3889     }
3890 }
3891
3892 /* Returns true when analyze_subscript_affine_affine can be used for
3893    determining the dependence relation between chrec_a and chrec_b,
3894    that contain symbols.  This function modifies chrec_a and chrec_b
3895    such that the analysis result is the same, and such that they don't
3896    contain symbols, and then can safely be passed to the analyzer.
3897
3898    Example: The analysis of the following tuples of evolutions produce
3899    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
3900    vs. {0, +, 1}_1
3901
3902    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
3903    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
3904 */
3905
3906 static bool
3907 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
3908 {
3909   tree diff, type, left_a, left_b, right_b;
3910
3911   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
3912       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
3913     /* FIXME: For the moment not handled.  Might be refined later.  */
3914     return false;
3915
3916   type = chrec_type (*chrec_a);
3917   left_a = CHREC_LEFT (*chrec_a);
3918   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
3919   diff = chrec_fold_minus (type, left_a, left_b);
3920
3921   if (!evolution_function_is_constant_p (diff))
3922     return false;
3923
3924   if (dump_file && (dump_flags & TDF_DETAILS))
3925     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
3926
3927   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
3928                                      diff, CHREC_RIGHT (*chrec_a));
3929   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
3930   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
3931                                      build_int_cst (type, 0),
3932                                      right_b);
3933   return true;
3934 }
3935
3936 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
3937    *OVERLAPS_B are initialized to the functions that describe the
3938    relation between the elements accessed twice by CHREC_A and
3939    CHREC_B.  For k >= 0, the following property is verified:
3940
3941    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3942
3943 static void
3944 analyze_siv_subscript (tree chrec_a,
3945                        tree chrec_b,
3946                        conflict_function **overlaps_a,
3947                        conflict_function **overlaps_b,
3948                        tree *last_conflicts,
3949                        int loop_nest_num)
3950 {
3951   dependence_stats.num_siv++;
3952
3953   if (dump_file && (dump_flags & TDF_DETAILS))
3954     fprintf (dump_file, "(analyze_siv_subscript \n");
3955
3956   if (evolution_function_is_constant_p (chrec_a)
3957       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3958     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
3959                                       overlaps_a, overlaps_b, last_conflicts);
3960
3961   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3962            && evolution_function_is_constant_p (chrec_b))
3963     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
3964                                       overlaps_b, overlaps_a, last_conflicts);
3965
3966   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3967            && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3968     {
3969       if (!chrec_contains_symbols (chrec_a)
3970           && !chrec_contains_symbols (chrec_b))
3971         {
3972           analyze_subscript_affine_affine (chrec_a, chrec_b,
3973                                            overlaps_a, overlaps_b,
3974                                            last_conflicts);
3975
3976           if (CF_NOT_KNOWN_P (*overlaps_a)
3977               || CF_NOT_KNOWN_P (*overlaps_b))
3978             dependence_stats.num_siv_unimplemented++;
3979           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3980                    || CF_NO_DEPENDENCE_P (*overlaps_b))
3981             dependence_stats.num_siv_independent++;
3982           else
3983             dependence_stats.num_siv_dependent++;
3984         }
3985       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
3986                                                         &chrec_b))
3987         {
3988           analyze_subscript_affine_affine (chrec_a, chrec_b,
3989                                            overlaps_a, overlaps_b,
3990                                            last_conflicts);
3991
3992           if (CF_NOT_KNOWN_P (*overlaps_a)
3993               || CF_NOT_KNOWN_P (*overlaps_b))
3994             dependence_stats.num_siv_unimplemented++;
3995           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3996                    || CF_NO_DEPENDENCE_P (*overlaps_b))
3997             dependence_stats.num_siv_independent++;
3998           else
3999             dependence_stats.num_siv_dependent++;
4000         }
4001       else
4002         goto siv_subscript_dontknow;
4003     }
4004
4005   else
4006     {
4007     siv_subscript_dontknow:;
4008       if (dump_file && (dump_flags & TDF_DETAILS))
4009         fprintf (dump_file, "  siv test failed: unimplemented");
4010       *overlaps_a = conflict_fn_not_known ();
4011       *overlaps_b = conflict_fn_not_known ();
4012       *last_conflicts = chrec_dont_know;
4013       dependence_stats.num_siv_unimplemented++;
4014     }
4015
4016   if (dump_file && (dump_flags & TDF_DETAILS))
4017     fprintf (dump_file, ")\n");
4018 }
4019
4020 /* Returns false if we can prove that the greatest common divisor of the steps
4021    of CHREC does not divide CST, false otherwise.  */
4022
4023 static bool
4024 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4025 {
4026   HOST_WIDE_INT cd = 0, val;
4027   tree step;
4028
4029   if (!tree_fits_shwi_p (cst))
4030     return true;
4031   val = tree_to_shwi (cst);
4032
4033   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4034     {
4035       step = CHREC_RIGHT (chrec);
4036       if (!tree_fits_shwi_p (step))
4037         return true;
4038       cd = gcd (cd, tree_to_shwi (step));
4039       chrec = CHREC_LEFT (chrec);
4040     }
4041
4042   return val % cd == 0;
4043 }
4044
4045 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4046    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
4047    functions that describe the relation between the elements accessed
4048    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
4049    is verified:
4050
4051    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4052
4053 static void
4054 analyze_miv_subscript (tree chrec_a,
4055                        tree chrec_b,
4056                        conflict_function **overlaps_a,
4057                        conflict_function **overlaps_b,
4058                        tree *last_conflicts,
4059                        class loop *loop_nest)
4060 {
4061   tree type, difference;
4062
4063   dependence_stats.num_miv++;
4064   if (dump_file && (dump_flags & TDF_DETAILS))
4065     fprintf (dump_file, "(analyze_miv_subscript \n");
4066
4067   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4068   chrec_a = chrec_convert (type, chrec_a, NULL);
4069   chrec_b = chrec_convert (type, chrec_b, NULL);
4070   difference = chrec_fold_minus (type, chrec_a, chrec_b);
4071
4072   if (eq_evolutions_p (chrec_a, chrec_b))
4073     {
4074       /* Access functions are the same: all the elements are accessed
4075          in the same order.  */
4076       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4077       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4078       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4079       dependence_stats.num_miv_dependent++;
4080     }
4081
4082   else if (evolution_function_is_constant_p (difference)
4083            && evolution_function_is_affine_multivariate_p (chrec_a,
4084                                                            loop_nest->num)
4085            && !gcd_of_steps_may_divide_p (chrec_a, difference))
4086     {
4087       /* testsuite/.../ssa-chrec-33.c
4088          {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
4089
4090          The difference is 1, and all the evolution steps are multiples
4091          of 2, consequently there are no overlapping elements.  */
4092       *overlaps_a = conflict_fn_no_dependence ();
4093       *overlaps_b = conflict_fn_no_dependence ();
4094       *last_conflicts = integer_zero_node;
4095       dependence_stats.num_miv_independent++;
4096     }
4097
4098   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4099            && !chrec_contains_symbols (chrec_a, loop_nest)
4100            && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4101            && !chrec_contains_symbols (chrec_b, loop_nest))
4102     {
4103       /* testsuite/.../ssa-chrec-35.c
4104          {0, +, 1}_2  vs.  {0, +, 1}_3
4105          the overlapping elements are respectively located at iterations:
4106          {0, +, 1}_x and {0, +, 1}_x,
4107          in other words, we have the equality:
4108          {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4109
4110          Other examples:
4111          {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4112          {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4113
4114          {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4115          {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4116       */
4117       analyze_subscript_affine_affine (chrec_a, chrec_b,
4118                                        overlaps_a, overlaps_b, last_conflicts);
4119
4120       if (CF_NOT_KNOWN_P (*overlaps_a)
4121           || CF_NOT_KNOWN_P (*overlaps_b))
4122         dependence_stats.num_miv_unimplemented++;
4123       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4124                || CF_NO_DEPENDENCE_P (*overlaps_b))
4125         dependence_stats.num_miv_independent++;
4126       else
4127         dependence_stats.num_miv_dependent++;
4128     }
4129
4130   else
4131     {
4132       /* When the analysis is too difficult, answer "don't know".  */
4133       if (dump_file && (dump_flags & TDF_DETAILS))
4134         fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4135
4136       *overlaps_a = conflict_fn_not_known ();
4137       *overlaps_b = conflict_fn_not_known ();
4138       *last_conflicts = chrec_dont_know;
4139       dependence_stats.num_miv_unimplemented++;
4140     }
4141
4142   if (dump_file && (dump_flags & TDF_DETAILS))
4143     fprintf (dump_file, ")\n");
4144 }
4145
4146 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4147    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
4148    OVERLAP_ITERATIONS_B are initialized with two functions that
4149    describe the iterations that contain conflicting elements.
4150
4151    Remark: For an integer k >= 0, the following equality is true:
4152
4153    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4154 */
4155
4156 static void
4157 analyze_overlapping_iterations (tree chrec_a,
4158                                 tree chrec_b,
4159                                 conflict_function **overlap_iterations_a,
4160                                 conflict_function **overlap_iterations_b,
4161                                 tree *last_conflicts, class loop *loop_nest)
4162 {
4163   unsigned int lnn = loop_nest->num;
4164
4165   dependence_stats.num_subscript_tests++;
4166
4167   if (dump_file && (dump_flags & TDF_DETAILS))
4168     {
4169       fprintf (dump_file, "(analyze_overlapping_iterations \n");
4170       fprintf (dump_file, "  (chrec_a = ");
4171       print_generic_expr (dump_file, chrec_a);
4172       fprintf (dump_file, ")\n  (chrec_b = ");
4173       print_generic_expr (dump_file, chrec_b);
4174       fprintf (dump_file, ")\n");
4175     }
4176
4177   if (chrec_a == NULL_TREE
4178       || chrec_b == NULL_TREE
4179       || chrec_contains_undetermined (chrec_a)
4180       || chrec_contains_undetermined (chrec_b))
4181     {
4182       dependence_stats.num_subscript_undetermined++;
4183
4184       *overlap_iterations_a = conflict_fn_not_known ();
4185       *overlap_iterations_b = conflict_fn_not_known ();
4186     }
4187
4188   /* If they are the same chrec, and are affine, they overlap
4189      on every iteration.  */
4190   else if (eq_evolutions_p (chrec_a, chrec_b)
4191            && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4192                || operand_equal_p (chrec_a, chrec_b, 0)))
4193     {
4194       dependence_stats.num_same_subscript_function++;
4195       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4196       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4197       *last_conflicts = chrec_dont_know;
4198     }
4199
4200   /* If they aren't the same, and aren't affine, we can't do anything
4201      yet.  */
4202   else if ((chrec_contains_symbols (chrec_a)
4203             || chrec_contains_symbols (chrec_b))
4204            && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4205                || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4206     {
4207       dependence_stats.num_subscript_undetermined++;
4208       *overlap_iterations_a = conflict_fn_not_known ();
4209       *overlap_iterations_b = conflict_fn_not_known ();
4210     }
4211
4212   else if (ziv_subscript_p (chrec_a, chrec_b))
4213     analyze_ziv_subscript (chrec_a, chrec_b,
4214                            overlap_iterations_a, overlap_iterations_b,
4215                            last_conflicts);
4216
4217   else if (siv_subscript_p (chrec_a, chrec_b))
4218     analyze_siv_subscript (chrec_a, chrec_b,
4219                            overlap_iterations_a, overlap_iterations_b,
4220                            last_conflicts, lnn);
4221
4222   else
4223     analyze_miv_subscript (chrec_a, chrec_b,
4224                            overlap_iterations_a, overlap_iterations_b,
4225                            last_conflicts, loop_nest);
4226
4227   if (dump_file && (dump_flags & TDF_DETAILS))
4228     {
4229       fprintf (dump_file, "  (overlap_iterations_a = ");
4230       dump_conflict_function (dump_file, *overlap_iterations_a);
4231       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
4232       dump_conflict_function (dump_file, *overlap_iterations_b);
4233       fprintf (dump_file, "))\n");
4234     }
4235 }
4236
4237 /* Helper function for uniquely inserting distance vectors.  */
4238
4239 static void
4240 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
4241 {
4242   unsigned i;
4243   lambda_vector v;
4244
4245   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
4246     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
4247       return;
4248
4249   DDR_DIST_VECTS (ddr).safe_push (dist_v);
4250 }
4251
4252 /* Helper function for uniquely inserting direction vectors.  */
4253
4254 static void
4255 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
4256 {
4257   unsigned i;
4258   lambda_vector v;
4259
4260   FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
4261     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
4262       return;
4263
4264   DDR_DIR_VECTS (ddr).safe_push (dir_v);
4265 }
4266
4267 /* Add a distance of 1 on all the loops outer than INDEX.  If we
4268    haven't yet determined a distance for this outer loop, push a new
4269    distance vector composed of the previous distance, and a distance
4270    of 1 for this outer loop.  Example:
4271
4272    | loop_1
4273    |   loop_2
4274    |     A[10]
4275    |   endloop_2
4276    | endloop_1
4277
4278    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
4279    save (0, 1), then we have to save (1, 0).  */
4280
4281 static void
4282 add_outer_distances (struct data_dependence_relation *ddr,
4283                      lambda_vector dist_v, int index)
4284 {
4285   /* For each outer loop where init_v is not set, the accesses are
4286      in dependence of distance 1 in the loop.  */
4287   while (--index >= 0)
4288     {
4289       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4290       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4291       save_v[index] = 1;
4292       save_dist_v (ddr, save_v);
4293     }
4294 }
4295
4296 /* Return false when fail to represent the data dependence as a
4297    distance vector.  A_INDEX is the index of the first reference
4298    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
4299    second reference.  INIT_B is set to true when a component has been
4300    added to the distance vector DIST_V.  INDEX_CARRY is then set to
4301    the index in DIST_V that carries the dependence.  */
4302
4303 static bool
4304 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
4305                              unsigned int a_index, unsigned int b_index,
4306                              lambda_vector dist_v, bool *init_b,
4307                              int *index_carry)
4308 {
4309   unsigned i;
4310   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4311   class loop *loop = DDR_LOOP_NEST (ddr)[0];
4312
4313   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4314     {
4315       tree access_fn_a, access_fn_b;
4316       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
4317
4318       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4319         {
4320           non_affine_dependence_relation (ddr);
4321           return false;
4322         }
4323
4324       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
4325       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
4326
4327       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
4328           && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
4329         {
4330           HOST_WIDE_INT dist;
4331           int index;
4332           int var_a = CHREC_VARIABLE (access_fn_a);
4333           int var_b = CHREC_VARIABLE (access_fn_b);
4334
4335           if (var_a != var_b
4336               || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4337             {
4338               non_affine_dependence_relation (ddr);
4339               return false;
4340             }
4341
4342           /* When data references are collected in a loop while data
4343              dependences are analyzed in loop nest nested in the loop, we
4344              would have more number of access functions than number of
4345              loops.  Skip access functions of loops not in the loop nest.
4346
4347              See PR89725 for more information.  */
4348           if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
4349             continue;
4350
4351           dist = int_cst_value (SUB_DISTANCE (subscript));
4352           index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
4353           *index_carry = MIN (index, *index_carry);
4354
4355           /* This is the subscript coupling test.  If we have already
4356              recorded a distance for this loop (a distance coming from
4357              another subscript), it should be the same.  For example,
4358              in the following code, there is no dependence:
4359
4360              | loop i = 0, N, 1
4361              |   T[i+1][i] = ...
4362              |   ... = T[i][i]
4363              | endloop
4364           */
4365           if (init_v[index] != 0 && dist_v[index] != dist)
4366             {
4367               finalize_ddr_dependent (ddr, chrec_known);
4368               return false;
4369             }
4370
4371           dist_v[index] = dist;
4372           init_v[index] = 1;
4373           *init_b = true;
4374         }
4375       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
4376         {
4377           /* This can be for example an affine vs. constant dependence
4378              (T[i] vs. T[3]) that is not an affine dependence and is
4379              not representable as a distance vector.  */
4380           non_affine_dependence_relation (ddr);
4381           return false;
4382         }
4383     }
4384
4385   return true;
4386 }
4387
4388 /* Return true when the DDR contains only constant access functions.  */
4389
4390 static bool
4391 constant_access_functions (const struct data_dependence_relation *ddr)
4392 {
4393   unsigned i;
4394   subscript *sub;
4395
4396   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4397     if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
4398         || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
4399       return false;
4400
4401   return true;
4402 }
4403
4404 /* Helper function for the case where DDR_A and DDR_B are the same
4405    multivariate access function with a constant step.  For an example
4406    see pr34635-1.c.  */
4407
4408 static void
4409 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
4410 {
4411   int x_1, x_2;
4412   tree c_1 = CHREC_LEFT (c_2);
4413   tree c_0 = CHREC_LEFT (c_1);
4414   lambda_vector dist_v;
4415   HOST_WIDE_INT v1, v2, cd;
4416
4417   /* Polynomials with more than 2 variables are not handled yet.  When
4418      the evolution steps are parameters, it is not possible to
4419      represent the dependence using classical distance vectors.  */
4420   if (TREE_CODE (c_0) != INTEGER_CST
4421       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
4422       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
4423     {
4424       DDR_AFFINE_P (ddr) = false;
4425       return;
4426     }
4427
4428   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
4429   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
4430
4431   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
4432   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4433   v1 = int_cst_value (CHREC_RIGHT (c_1));
4434   v2 = int_cst_value (CHREC_RIGHT (c_2));
4435   cd = gcd (v1, v2);
4436   v1 /= cd;
4437   v2 /= cd;
4438
4439   if (v2 < 0)
4440     {
4441       v2 = -v2;
4442       v1 = -v1;
4443     }
4444
4445   dist_v[x_1] = v2;
4446   dist_v[x_2] = -v1;
4447   save_dist_v (ddr, dist_v);
4448
4449   add_outer_distances (ddr, dist_v, x_1);
4450 }
4451
4452 /* Helper function for the case where DDR_A and DDR_B are the same
4453    access functions.  */
4454
4455 static void
4456 add_other_self_distances (struct data_dependence_relation *ddr)
4457 {
4458   lambda_vector dist_v;
4459   unsigned i;
4460   int index_carry = DDR_NB_LOOPS (ddr);
4461   subscript *sub;
4462   class loop *loop = DDR_LOOP_NEST (ddr)[0];
4463
4464   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4465     {
4466       tree access_fun = SUB_ACCESS_FN (sub, 0);
4467
4468       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
4469         {
4470           if (!evolution_function_is_univariate_p (access_fun, loop->num))
4471             {
4472               if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
4473                 {
4474                   DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
4475                   return;
4476                 }
4477
4478               access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
4479
4480               if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
4481                 add_multivariate_self_dist (ddr, access_fun);
4482               else
4483                 /* The evolution step is not constant: it varies in
4484                    the outer loop, so this cannot be represented by a
4485                    distance vector.  For example in pr34635.c the
4486                    evolution is {0, +, {0, +, 4}_1}_2.  */
4487                 DDR_AFFINE_P (ddr) = false;
4488
4489               return;
4490             }
4491
4492           /* When data references are collected in a loop while data
4493              dependences are analyzed in loop nest nested in the loop, we
4494              would have more number of access functions than number of
4495              loops.  Skip access functions of loops not in the loop nest.
4496
4497              See PR89725 for more information.  */
4498           if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
4499                                   loop))
4500             continue;
4501
4502           index_carry = MIN (index_carry,
4503                              index_in_loop_nest (CHREC_VARIABLE (access_fun),
4504                                                  DDR_LOOP_NEST (ddr)));
4505         }
4506     }
4507
4508   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4509   add_outer_distances (ddr, dist_v, index_carry);
4510 }
4511
4512 static void
4513 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
4514 {
4515   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4516
4517   dist_v[0] = 1;
4518   save_dist_v (ddr, dist_v);
4519 }
4520
4521 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
4522    is the case for example when access functions are the same and
4523    equal to a constant, as in:
4524
4525    | loop_1
4526    |   A[3] = ...
4527    |   ... = A[3]
4528    | endloop_1
4529
4530    in which case the distance vectors are (0) and (1).  */
4531
4532 static void
4533 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
4534 {
4535   unsigned i, j;
4536
4537   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4538     {
4539       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
4540       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
4541       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
4542
4543       for (j = 0; j < ca->n; j++)
4544         if (affine_function_zero_p (ca->fns[j]))
4545           {
4546             insert_innermost_unit_dist_vector (ddr);
4547             return;
4548           }
4549
4550       for (j = 0; j < cb->n; j++)
4551         if (affine_function_zero_p (cb->fns[j]))
4552           {
4553             insert_innermost_unit_dist_vector (ddr);
4554             return;
4555           }
4556     }
4557 }
4558
4559 /* Return true when the DDR contains two data references that have the
4560    same access functions.  */
4561
4562 static inline bool
4563 same_access_functions (const struct data_dependence_relation *ddr)
4564 {
4565   unsigned i;
4566   subscript *sub;
4567
4568   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4569     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
4570                           SUB_ACCESS_FN (sub, 1)))
4571       return false;
4572
4573   return true;
4574 }
4575
4576 /* Compute the classic per loop distance vector.  DDR is the data
4577    dependence relation to build a vector from.  Return false when fail
4578    to represent the data dependence as a distance vector.  */
4579
4580 static bool
4581 build_classic_dist_vector (struct data_dependence_relation *ddr,
4582                            class loop *loop_nest)
4583 {
4584   bool init_b = false;
4585   int index_carry = DDR_NB_LOOPS (ddr);
4586   lambda_vector dist_v;
4587
4588   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4589     return false;
4590
4591   if (same_access_functions (ddr))
4592     {
4593       /* Save the 0 vector.  */
4594       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4595       save_dist_v (ddr, dist_v);
4596
4597       if (constant_access_functions (ddr))
4598         add_distance_for_zero_overlaps (ddr);
4599
4600       if (DDR_NB_LOOPS (ddr) > 1)
4601         add_other_self_distances (ddr);
4602
4603       return true;
4604     }
4605
4606   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4607   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
4608     return false;
4609
4610   /* Save the distance vector if we initialized one.  */
4611   if (init_b)
4612     {
4613       /* Verify a basic constraint: classic distance vectors should
4614          always be lexicographically positive.
4615
4616          Data references are collected in the order of execution of
4617          the program, thus for the following loop
4618
4619          | for (i = 1; i < 100; i++)
4620          |   for (j = 1; j < 100; j++)
4621          |     {
4622          |       t = T[j+1][i-1];  // A
4623          |       T[j][i] = t + 2;  // B
4624          |     }
4625
4626          references are collected following the direction of the wind:
4627          A then B.  The data dependence tests are performed also
4628          following this order, such that we're looking at the distance
4629          separating the elements accessed by A from the elements later
4630          accessed by B.  But in this example, the distance returned by
4631          test_dep (A, B) is lexicographically negative (-1, 1), that
4632          means that the access A occurs later than B with respect to
4633          the outer loop, ie. we're actually looking upwind.  In this
4634          case we solve test_dep (B, A) looking downwind to the
4635          lexicographically positive solution, that returns the
4636          distance vector (1, -1).  */
4637       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
4638         {
4639           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4640           if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4641             return false;
4642           compute_subscript_distance (ddr);
4643           if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
4644                                             &index_carry))
4645             return false;
4646           save_dist_v (ddr, save_v);
4647           DDR_REVERSED_P (ddr) = true;
4648
4649           /* In this case there is a dependence forward for all the
4650              outer loops:
4651
4652              | for (k = 1; k < 100; k++)
4653              |  for (i = 1; i < 100; i++)
4654              |   for (j = 1; j < 100; j++)
4655              |     {
4656              |       t = T[j+1][i-1];  // A
4657              |       T[j][i] = t + 2;  // B
4658              |     }
4659
4660              the vectors are:
4661              (0,  1, -1)
4662              (1,  1, -1)
4663              (1, -1,  1)
4664           */
4665           if (DDR_NB_LOOPS (ddr) > 1)
4666             {
4667               add_outer_distances (ddr, save_v, index_carry);
4668               add_outer_distances (ddr, dist_v, index_carry);
4669             }
4670         }
4671       else
4672         {
4673           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4674           lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4675
4676           if (DDR_NB_LOOPS (ddr) > 1)
4677             {
4678               lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4679
4680               if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4681                 return false;
4682               compute_subscript_distance (ddr);
4683               if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
4684                                                 &index_carry))
4685                 return false;
4686
4687               save_dist_v (ddr, save_v);
4688               add_outer_distances (ddr, dist_v, index_carry);
4689               add_outer_distances (ddr, opposite_v, index_carry);
4690             }
4691           else
4692             save_dist_v (ddr, save_v);
4693         }
4694     }
4695   else
4696     {
4697       /* There is a distance of 1 on all the outer loops: Example:
4698          there is a dependence of distance 1 on loop_1 for the array A.
4699
4700          | loop_1
4701          |   A[5] = ...
4702          | endloop
4703       */
4704       add_outer_distances (ddr, dist_v,
4705                            lambda_vector_first_nz (dist_v,
4706                                                    DDR_NB_LOOPS (ddr), 0));
4707     }
4708
4709   if (dump_file && (dump_flags & TDF_DETAILS))
4710     {
4711       unsigned i;
4712
4713       fprintf (dump_file, "(build_classic_dist_vector\n");
4714       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4715         {
4716           fprintf (dump_file, "  dist_vector = (");
4717           print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
4718                                DDR_NB_LOOPS (ddr));
4719           fprintf (dump_file, "  )\n");
4720         }
4721       fprintf (dump_file, ")\n");
4722     }
4723
4724   return true;
4725 }
4726
4727 /* Return the direction for a given distance.
4728    FIXME: Computing dir this way is suboptimal, since dir can catch
4729    cases that dist is unable to represent.  */
4730
4731 static inline enum data_dependence_direction
4732 dir_from_dist (int dist)
4733 {
4734   if (dist > 0)
4735     return dir_positive;
4736   else if (dist < 0)
4737     return dir_negative;
4738   else
4739     return dir_equal;
4740 }
4741
4742 /* Compute the classic per loop direction vector.  DDR is the data
4743    dependence relation to build a vector from.  */
4744
4745 static void
4746 build_classic_dir_vector (struct data_dependence_relation *ddr)
4747 {
4748   unsigned i, j;
4749   lambda_vector dist_v;
4750
4751   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
4752     {
4753       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4754
4755       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4756         dir_v[j] = dir_from_dist (dist_v[j]);
4757
4758       save_dir_v (ddr, dir_v);
4759     }
4760 }
4761
4762 /* Helper function.  Returns true when there is a dependence between the
4763    data references.  A_INDEX is the index of the first reference (0 for
4764    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
4765
4766 static bool
4767 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
4768                                unsigned int a_index, unsigned int b_index,
4769                                class loop *loop_nest)
4770 {
4771   unsigned int i;
4772   tree last_conflicts;
4773   struct subscript *subscript;
4774   tree res = NULL_TREE;
4775
4776   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
4777     {
4778       conflict_function *overlaps_a, *overlaps_b;
4779
4780       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
4781                                       SUB_ACCESS_FN (subscript, b_index),
4782                                       &overlaps_a, &overlaps_b,
4783                                       &last_conflicts, loop_nest);
4784
4785       if (SUB_CONFLICTS_IN_A (subscript))
4786         free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4787       if (SUB_CONFLICTS_IN_B (subscript))
4788         free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4789
4790       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
4791       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
4792       SUB_LAST_CONFLICT (subscript) = last_conflicts;
4793
4794       /* If there is any undetermined conflict function we have to
4795          give a conservative answer in case we cannot prove that
4796          no dependence exists when analyzing another subscript.  */
4797       if (CF_NOT_KNOWN_P (overlaps_a)
4798           || CF_NOT_KNOWN_P (overlaps_b))
4799         {
4800           res = chrec_dont_know;
4801           continue;
4802         }
4803
4804       /* When there is a subscript with no dependence we can stop.  */
4805       else if (CF_NO_DEPENDENCE_P (overlaps_a)
4806                || CF_NO_DEPENDENCE_P (overlaps_b))
4807         {
4808           res = chrec_known;
4809           break;
4810         }
4811     }
4812
4813   if (res == NULL_TREE)
4814     return true;
4815
4816   if (res == chrec_known)
4817     dependence_stats.num_dependence_independent++;
4818   else
4819     dependence_stats.num_dependence_undetermined++;
4820   finalize_ddr_dependent (ddr, res);
4821   return false;
4822 }
4823
4824 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
4825
4826 static void
4827 subscript_dependence_tester (struct data_dependence_relation *ddr,
4828                              class loop *loop_nest)
4829 {
4830   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
4831     dependence_stats.num_dependence_dependent++;
4832
4833   compute_subscript_distance (ddr);
4834   if (build_classic_dist_vector (ddr, loop_nest))
4835     build_classic_dir_vector (ddr);
4836 }
4837
4838 /* Returns true when all the access functions of A are affine or
4839    constant with respect to LOOP_NEST.  */
4840
4841 static bool
4842 access_functions_are_affine_or_constant_p (const struct data_reference *a,
4843                                            const class loop *loop_nest)
4844 {
4845   unsigned int i;
4846   vec<tree> fns = DR_ACCESS_FNS (a);
4847   tree t;
4848
4849   FOR_EACH_VEC_ELT (fns, i, t)
4850     if (!evolution_function_is_invariant_p (t, loop_nest->num)
4851         && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
4852       return false;
4853
4854   return true;
4855 }
4856
4857 /* This computes the affine dependence relation between A and B with
4858    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
4859    independence between two accesses, while CHREC_DONT_KNOW is used
4860    for representing the unknown relation.
4861
4862    Note that it is possible to stop the computation of the dependence
4863    relation the first time we detect a CHREC_KNOWN element for a given
4864    subscript.  */
4865
4866 void
4867 compute_affine_dependence (struct data_dependence_relation *ddr,
4868                            class loop *loop_nest)
4869 {
4870   struct data_reference *dra = DDR_A (ddr);
4871   struct data_reference *drb = DDR_B (ddr);
4872
4873   if (dump_file && (dump_flags & TDF_DETAILS))
4874     {
4875       fprintf (dump_file, "(compute_affine_dependence\n");
4876       fprintf (dump_file, "  stmt_a: ");
4877       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4878       fprintf (dump_file, "  stmt_b: ");
4879       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4880     }
4881
4882   /* Analyze only when the dependence relation is not yet known.  */
4883   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4884     {
4885       dependence_stats.num_dependence_tests++;
4886
4887       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4888           && access_functions_are_affine_or_constant_p (drb, loop_nest))
4889         subscript_dependence_tester (ddr, loop_nest);
4890
4891       /* As a last case, if the dependence cannot be determined, or if
4892          the dependence is considered too difficult to determine, answer
4893          "don't know".  */
4894       else
4895         {
4896           dependence_stats.num_dependence_undetermined++;
4897
4898           if (dump_file && (dump_flags & TDF_DETAILS))
4899             {
4900               fprintf (dump_file, "Data ref a:\n");
4901               dump_data_reference (dump_file, dra);
4902               fprintf (dump_file, "Data ref b:\n");
4903               dump_data_reference (dump_file, drb);
4904               fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4905             }
4906           finalize_ddr_dependent (ddr, chrec_dont_know);
4907         }
4908     }
4909
4910   if (dump_file && (dump_flags & TDF_DETAILS))
4911     {
4912       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4913         fprintf (dump_file, ") -> no dependence\n");
4914       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4915         fprintf (dump_file, ") -> dependence analysis failed\n");
4916       else
4917         fprintf (dump_file, ")\n");
4918     }
4919 }
4920
4921 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4922    the data references in DATAREFS, in the LOOP_NEST.  When
4923    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4924    relations.  Return true when successful, i.e. data references number
4925    is small enough to be handled.  */
4926
4927 bool
4928 compute_all_dependences (vec<data_reference_p> datarefs,
4929                          vec<ddr_p> *dependence_relations,
4930                          vec<loop_p> loop_nest,
4931                          bool compute_self_and_rr)
4932 {
4933   struct data_dependence_relation *ddr;
4934   struct data_reference *a, *b;
4935   unsigned int i, j;
4936
4937   if ((int) datarefs.length ()
4938       > param_loop_max_datarefs_for_datadeps)
4939     {
4940       struct data_dependence_relation *ddr;
4941
4942       /* Insert a single relation into dependence_relations:
4943          chrec_dont_know.  */
4944       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4945       dependence_relations->safe_push (ddr);
4946       return false;
4947     }
4948
4949   FOR_EACH_VEC_ELT (datarefs, i, a)
4950     for (j = i + 1; datarefs.iterate (j, &b); j++)
4951       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4952         {
4953           ddr = initialize_data_dependence_relation (a, b, loop_nest);
4954           dependence_relations->safe_push (ddr);
4955           if (loop_nest.exists ())
4956             compute_affine_dependence (ddr, loop_nest[0]);
4957         }
4958
4959   if (compute_self_and_rr)
4960     FOR_EACH_VEC_ELT (datarefs, i, a)
4961       {
4962         ddr = initialize_data_dependence_relation (a, a, loop_nest);
4963         dependence_relations->safe_push (ddr);
4964         if (loop_nest.exists ())
4965           compute_affine_dependence (ddr, loop_nest[0]);
4966       }
4967
4968   return true;
4969 }
4970
4971 /* Describes a location of a memory reference.  */
4972
4973 struct data_ref_loc
4974 {
4975   /* The memory reference.  */
4976   tree ref;
4977
4978   /* True if the memory reference is read.  */
4979   bool is_read;
4980
4981   /* True if the data reference is conditional within the containing
4982      statement, i.e. if it might not occur even when the statement
4983      is executed and runs to completion.  */
4984   bool is_conditional_in_stmt;
4985 };
4986
4987
4988 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
4989    true if STMT clobbers memory, false otherwise.  */
4990
4991 static bool
4992 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
4993 {
4994   bool clobbers_memory = false;
4995   data_ref_loc ref;
4996   tree op0, op1;
4997   enum gimple_code stmt_code = gimple_code (stmt);
4998
4999   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5000      As we cannot model data-references to not spelled out
5001      accesses give up if they may occur.  */
5002   if (stmt_code == GIMPLE_CALL
5003       && !(gimple_call_flags (stmt) & ECF_CONST))
5004     {
5005       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
5006       if (gimple_call_internal_p (stmt))
5007         switch (gimple_call_internal_fn (stmt))
5008           {
5009           case IFN_GOMP_SIMD_LANE:
5010             {
5011               class loop *loop = gimple_bb (stmt)->loop_father;
5012               tree uid = gimple_call_arg (stmt, 0);
5013               gcc_assert (TREE_CODE (uid) == SSA_NAME);
5014               if (loop == NULL
5015                   || loop->simduid != SSA_NAME_VAR (uid))
5016                 clobbers_memory = true;
5017               break;
5018             }
5019           case IFN_MASK_LOAD:
5020           case IFN_MASK_STORE:
5021             break;
5022           default:
5023             clobbers_memory = true;
5024             break;
5025           }
5026       else
5027         clobbers_memory = true;
5028     }
5029   else if (stmt_code == GIMPLE_ASM
5030            && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5031                || gimple_vuse (stmt)))
5032     clobbers_memory = true;
5033
5034   if (!gimple_vuse (stmt))
5035     return clobbers_memory;
5036
5037   if (stmt_code == GIMPLE_ASSIGN)
5038     {
5039       tree base;
5040       op0 = gimple_assign_lhs (stmt);
5041       op1 = gimple_assign_rhs1 (stmt);
5042
5043       if (DECL_P (op1)
5044           || (REFERENCE_CLASS_P (op1)
5045               && (base = get_base_address (op1))
5046               && TREE_CODE (base) != SSA_NAME
5047               && !is_gimple_min_invariant (base)))
5048         {
5049           ref.ref = op1;
5050           ref.is_read = true;
5051           ref.is_conditional_in_stmt = false;
5052           references->safe_push (ref);
5053         }
5054     }
5055   else if (stmt_code == GIMPLE_CALL)
5056     {
5057       unsigned i, n;
5058       tree ptr, type;
5059       unsigned int align;
5060
5061       ref.is_read = false;
5062       if (gimple_call_internal_p (stmt))
5063         switch (gimple_call_internal_fn (stmt))
5064           {
5065           case IFN_MASK_LOAD:
5066             if (gimple_call_lhs (stmt) == NULL_TREE)
5067               break;
5068             ref.is_read = true;
5069             /* FALLTHRU */
5070           case IFN_MASK_STORE:
5071             ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5072             align = tree_to_shwi (gimple_call_arg (stmt, 1));
5073             if (ref.is_read)
5074               type = TREE_TYPE (gimple_call_lhs (stmt));
5075             else
5076               type = TREE_TYPE (gimple_call_arg (stmt, 3));
5077             if (TYPE_ALIGN (type) != align)
5078               type = build_aligned_type (type, align);
5079             ref.is_conditional_in_stmt = true;
5080             ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5081                                    ptr);
5082             references->safe_push (ref);
5083             return false;
5084           default:
5085             break;
5086           }
5087
5088       op0 = gimple_call_lhs (stmt);
5089       n = gimple_call_num_args (stmt);
5090       for (i = 0; i < n; i++)
5091         {
5092           op1 = gimple_call_arg (stmt, i);
5093
5094           if (DECL_P (op1)
5095               || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5096             {
5097               ref.ref = op1;
5098               ref.is_read = true;
5099               ref.is_conditional_in_stmt = false;
5100               references->safe_push (ref);
5101             }
5102         }
5103     }
5104   else
5105     return clobbers_memory;
5106
5107   if (op0
5108       && (DECL_P (op0)
5109           || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5110     {
5111       ref.ref = op0;
5112       ref.is_read = false;
5113       ref.is_conditional_in_stmt = false;
5114       references->safe_push (ref);
5115     }
5116   return clobbers_memory;
5117 }
5118
5119
5120 /* Returns true if the loop-nest has any data reference.  */
5121
5122 bool
5123 loop_nest_has_data_refs (loop_p loop)
5124 {
5125   basic_block *bbs = get_loop_body (loop);
5126   auto_vec<data_ref_loc, 3> references;
5127
5128   for (unsigned i = 0; i < loop->num_nodes; i++)
5129     {
5130       basic_block bb = bbs[i];
5131       gimple_stmt_iterator bsi;
5132
5133       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5134         {
5135           gimple *stmt = gsi_stmt (bsi);
5136           get_references_in_stmt (stmt, &references);
5137           if (references.length ())
5138             {
5139               free (bbs);
5140               return true;
5141             }
5142         }
5143     }
5144   free (bbs);
5145   return false;
5146 }
5147
5148 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
5149    reference, returns false, otherwise returns true.  NEST is the outermost
5150    loop of the loop nest in which the references should be analyzed.  */
5151
5152 opt_result
5153 find_data_references_in_stmt (class loop *nest, gimple *stmt,
5154                               vec<data_reference_p> *datarefs)
5155 {
5156   unsigned i;
5157   auto_vec<data_ref_loc, 2> references;
5158   data_ref_loc *ref;
5159   data_reference_p dr;
5160
5161   if (get_references_in_stmt (stmt, &references))
5162     return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5163                                    stmt);
5164
5165   FOR_EACH_VEC_ELT (references, i, ref)
5166     {
5167       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5168                             loop_containing_stmt (stmt), ref->ref,
5169                             stmt, ref->is_read, ref->is_conditional_in_stmt);
5170       gcc_assert (dr != NULL);
5171       datarefs->safe_push (dr);
5172     }
5173
5174   return opt_result::success ();
5175 }
5176
5177 /* Stores the data references in STMT to DATAREFS.  If there is an
5178    unanalyzable reference, returns false, otherwise returns true.
5179    NEST is the outermost loop of the loop nest in which the references
5180    should be instantiated, LOOP is the loop in which the references
5181    should be analyzed.  */
5182
5183 bool
5184 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5185                                        vec<data_reference_p> *datarefs)
5186 {
5187   unsigned i;
5188   auto_vec<data_ref_loc, 2> references;
5189   data_ref_loc *ref;
5190   bool ret = true;
5191   data_reference_p dr;
5192
5193   if (get_references_in_stmt (stmt, &references))
5194     return false;
5195
5196   FOR_EACH_VEC_ELT (references, i, ref)
5197     {
5198       dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
5199                             ref->is_conditional_in_stmt);
5200       gcc_assert (dr != NULL);
5201       datarefs->safe_push (dr);
5202     }
5203
5204   return ret;
5205 }
5206
5207 /* Search the data references in LOOP, and record the information into
5208    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5209    difficult case, returns NULL_TREE otherwise.  */
5210
5211 tree
5212 find_data_references_in_bb (class loop *loop, basic_block bb,
5213                             vec<data_reference_p> *datarefs)
5214 {
5215   gimple_stmt_iterator bsi;
5216
5217   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5218     {
5219       gimple *stmt = gsi_stmt (bsi);
5220
5221       if (!find_data_references_in_stmt (loop, stmt, datarefs))
5222         {
5223           struct data_reference *res;
5224           res = XCNEW (struct data_reference);
5225           datarefs->safe_push (res);
5226
5227           return chrec_dont_know;
5228         }
5229     }
5230
5231   return NULL_TREE;
5232 }
5233
5234 /* Search the data references in LOOP, and record the information into
5235    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5236    difficult case, returns NULL_TREE otherwise.
5237
5238    TODO: This function should be made smarter so that it can handle address
5239    arithmetic as if they were array accesses, etc.  */
5240
5241 tree
5242 find_data_references_in_loop (class loop *loop,
5243                               vec<data_reference_p> *datarefs)
5244 {
5245   basic_block bb, *bbs;
5246   unsigned int i;
5247
5248   bbs = get_loop_body_in_dom_order (loop);
5249
5250   for (i = 0; i < loop->num_nodes; i++)
5251     {
5252       bb = bbs[i];
5253
5254       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
5255         {
5256           free (bbs);
5257           return chrec_dont_know;
5258         }
5259     }
5260   free (bbs);
5261
5262   return NULL_TREE;
5263 }
5264
5265 /* Return the alignment in bytes that DRB is guaranteed to have at all
5266    times.  */
5267
5268 unsigned int
5269 dr_alignment (innermost_loop_behavior *drb)
5270 {
5271   /* Get the alignment of BASE_ADDRESS + INIT.  */
5272   unsigned int alignment = drb->base_alignment;
5273   unsigned int misalignment = (drb->base_misalignment
5274                                + TREE_INT_CST_LOW (drb->init));
5275   if (misalignment != 0)
5276     alignment = MIN (alignment, misalignment & -misalignment);
5277
5278   /* Cap it to the alignment of OFFSET.  */
5279   if (!integer_zerop (drb->offset))
5280     alignment = MIN (alignment, drb->offset_alignment);
5281
5282   /* Cap it to the alignment of STEP.  */
5283   if (!integer_zerop (drb->step))
5284     alignment = MIN (alignment, drb->step_alignment);
5285
5286   return alignment;
5287 }
5288
5289 /* If BASE is a pointer-typed SSA name, try to find the object that it
5290    is based on.  Return this object X on success and store the alignment
5291    in bytes of BASE - &X in *ALIGNMENT_OUT.  */
5292
5293 static tree
5294 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
5295 {
5296   if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
5297     return NULL_TREE;
5298
5299   gimple *def = SSA_NAME_DEF_STMT (base);
5300   base = analyze_scalar_evolution (loop_containing_stmt (def), base);
5301
5302   /* Peel chrecs and record the minimum alignment preserved by
5303      all steps.  */
5304   unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5305   while (TREE_CODE (base) == POLYNOMIAL_CHREC)
5306     {
5307       unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
5308       alignment = MIN (alignment, step_alignment);
5309       base = CHREC_LEFT (base);
5310     }
5311
5312   /* Punt if the expression is too complicated to handle.  */
5313   if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
5314     return NULL_TREE;
5315
5316   /* The only useful cases are those for which a dereference folds to something
5317      other than an INDIRECT_REF.  */
5318   tree ref_type = TREE_TYPE (TREE_TYPE (base));
5319   tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
5320   if (!ref)
5321     return NULL_TREE;
5322
5323   /* Analyze the base to which the steps we peeled were applied.  */
5324   poly_int64 bitsize, bitpos, bytepos;
5325   machine_mode mode;
5326   int unsignedp, reversep, volatilep;
5327   tree offset;
5328   base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
5329                               &unsignedp, &reversep, &volatilep);
5330   if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
5331     return NULL_TREE;
5332
5333   /* Restrict the alignment to that guaranteed by the offsets.  */
5334   unsigned int bytepos_alignment = known_alignment (bytepos);
5335   if (bytepos_alignment != 0)
5336     alignment = MIN (alignment, bytepos_alignment);
5337   if (offset)
5338     {
5339       unsigned int offset_alignment = highest_pow2_factor (offset);
5340       alignment = MIN (alignment, offset_alignment);
5341     }
5342
5343   *alignment_out = alignment;
5344   return base;
5345 }
5346
5347 /* Return the object whose alignment would need to be changed in order
5348    to increase the alignment of ADDR.  Store the maximum achievable
5349    alignment in *MAX_ALIGNMENT.  */
5350
5351 tree
5352 get_base_for_alignment (tree addr, unsigned int *max_alignment)
5353 {
5354   tree base = get_base_for_alignment_1 (addr, max_alignment);
5355   if (base)
5356     return base;
5357
5358   if (TREE_CODE (addr) == ADDR_EXPR)
5359     addr = TREE_OPERAND (addr, 0);
5360   *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5361   return addr;
5362 }
5363
5364 /* Recursive helper function.  */
5365
5366 static bool
5367 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
5368 {
5369   /* Inner loops of the nest should not contain siblings.  Example:
5370      when there are two consecutive loops,
5371
5372      | loop_0
5373      |   loop_1
5374      |     A[{0, +, 1}_1]
5375      |   endloop_1
5376      |   loop_2
5377      |     A[{0, +, 1}_2]
5378      |   endloop_2
5379      | endloop_0
5380
5381      the dependence relation cannot be captured by the distance
5382      abstraction.  */
5383   if (loop->next)
5384     return false;
5385
5386   loop_nest->safe_push (loop);
5387   if (loop->inner)
5388     return find_loop_nest_1 (loop->inner, loop_nest);
5389   return true;
5390 }
5391
5392 /* Return false when the LOOP is not well nested.  Otherwise return
5393    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
5394    contain the loops from the outermost to the innermost, as they will
5395    appear in the classic distance vector.  */
5396
5397 bool
5398 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
5399 {
5400   loop_nest->safe_push (loop);
5401   if (loop->inner)
5402     return find_loop_nest_1 (loop->inner, loop_nest);
5403   return true;
5404 }
5405
5406 /* Returns true when the data dependences have been computed, false otherwise.
5407    Given a loop nest LOOP, the following vectors are returned:
5408    DATAREFS is initialized to all the array elements contained in this loop,
5409    DEPENDENCE_RELATIONS contains the relations between the data references.
5410    Compute read-read and self relations if
5411    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
5412
5413 bool
5414 compute_data_dependences_for_loop (class loop *loop,
5415                                    bool compute_self_and_read_read_dependences,
5416                                    vec<loop_p> *loop_nest,
5417                                    vec<data_reference_p> *datarefs,
5418                                    vec<ddr_p> *dependence_relations)
5419 {
5420   bool res = true;
5421
5422   memset (&dependence_stats, 0, sizeof (dependence_stats));
5423
5424   /* If the loop nest is not well formed, or one of the data references
5425      is not computable, give up without spending time to compute other
5426      dependences.  */
5427   if (!loop
5428       || !find_loop_nest (loop, loop_nest)
5429       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
5430       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
5431                                    compute_self_and_read_read_dependences))
5432     res = false;
5433
5434   if (dump_file && (dump_flags & TDF_STATS))
5435     {
5436       fprintf (dump_file, "Dependence tester statistics:\n");
5437
5438       fprintf (dump_file, "Number of dependence tests: %d\n",
5439                dependence_stats.num_dependence_tests);
5440       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
5441                dependence_stats.num_dependence_dependent);
5442       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
5443                dependence_stats.num_dependence_independent);
5444       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
5445                dependence_stats.num_dependence_undetermined);
5446
5447       fprintf (dump_file, "Number of subscript tests: %d\n",
5448                dependence_stats.num_subscript_tests);
5449       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
5450                dependence_stats.num_subscript_undetermined);
5451       fprintf (dump_file, "Number of same subscript function: %d\n",
5452                dependence_stats.num_same_subscript_function);
5453
5454       fprintf (dump_file, "Number of ziv tests: %d\n",
5455                dependence_stats.num_ziv);
5456       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
5457                dependence_stats.num_ziv_dependent);
5458       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
5459                dependence_stats.num_ziv_independent);
5460       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
5461                dependence_stats.num_ziv_unimplemented);
5462
5463       fprintf (dump_file, "Number of siv tests: %d\n",
5464                dependence_stats.num_siv);
5465       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
5466                dependence_stats.num_siv_dependent);
5467       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
5468                dependence_stats.num_siv_independent);
5469       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
5470                dependence_stats.num_siv_unimplemented);
5471
5472       fprintf (dump_file, "Number of miv tests: %d\n",
5473                dependence_stats.num_miv);
5474       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
5475                dependence_stats.num_miv_dependent);
5476       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
5477                dependence_stats.num_miv_independent);
5478       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
5479                dependence_stats.num_miv_unimplemented);
5480     }
5481
5482   return res;
5483 }
5484
5485 /* Free the memory used by a data dependence relation DDR.  */
5486
5487 void
5488 free_dependence_relation (struct data_dependence_relation *ddr)
5489 {
5490   if (ddr == NULL)
5491     return;
5492
5493   if (DDR_SUBSCRIPTS (ddr).exists ())
5494     free_subscripts (DDR_SUBSCRIPTS (ddr));
5495   DDR_DIST_VECTS (ddr).release ();
5496   DDR_DIR_VECTS (ddr).release ();
5497
5498   free (ddr);
5499 }
5500
5501 /* Free the memory used by the data dependence relations from
5502    DEPENDENCE_RELATIONS.  */
5503
5504 void
5505 free_dependence_relations (vec<ddr_p> dependence_relations)
5506 {
5507   unsigned int i;
5508   struct data_dependence_relation *ddr;
5509
5510   FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5511     if (ddr)
5512       free_dependence_relation (ddr);
5513
5514   dependence_relations.release ();
5515 }
5516
5517 /* Free the memory used by the data references from DATAREFS.  */
5518
5519 void
5520 free_data_refs (vec<data_reference_p> datarefs)
5521 {
5522   unsigned int i;
5523   struct data_reference *dr;
5524
5525   FOR_EACH_VEC_ELT (datarefs, i, dr)
5526     free_data_ref (dr);
5527   datarefs.release ();
5528 }
5529
5530 /* Common routine implementing both dr_direction_indicator and
5531    dr_zero_step_indicator.  Return USEFUL_MIN if the indicator is known
5532    to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
5533    Return the step as the indicator otherwise.  */
5534
5535 static tree
5536 dr_step_indicator (struct data_reference *dr, int useful_min)
5537 {
5538   tree step = DR_STEP (dr);
5539   if (!step)
5540     return NULL_TREE;
5541   STRIP_NOPS (step);
5542   /* Look for cases where the step is scaled by a positive constant
5543      integer, which will often be the access size.  If the multiplication
5544      doesn't change the sign (due to overflow effects) then we can
5545      test the unscaled value instead.  */
5546   if (TREE_CODE (step) == MULT_EXPR
5547       && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
5548       && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
5549     {
5550       tree factor = TREE_OPERAND (step, 1);
5551       step = TREE_OPERAND (step, 0);
5552
5553       /* Strip widening and truncating conversions as well as nops.  */
5554       if (CONVERT_EXPR_P (step)
5555           && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
5556         step = TREE_OPERAND (step, 0);
5557       tree type = TREE_TYPE (step);
5558
5559       /* Get the range of step values that would not cause overflow.  */
5560       widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
5561                          / wi::to_widest (factor));
5562       widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
5563                          / wi::to_widest (factor));
5564
5565       /* Get the range of values that the unconverted step actually has.  */
5566       wide_int step_min, step_max;
5567       if (TREE_CODE (step) != SSA_NAME
5568           || get_range_info (step, &step_min, &step_max) != VR_RANGE)
5569         {
5570           step_min = wi::to_wide (TYPE_MIN_VALUE (type));
5571           step_max = wi::to_wide (TYPE_MAX_VALUE (type));
5572         }
5573
5574       /* Check whether the unconverted step has an acceptable range.  */
5575       signop sgn = TYPE_SIGN (type);
5576       if (wi::les_p (minv, widest_int::from (step_min, sgn))
5577           && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
5578         {
5579           if (wi::ge_p (step_min, useful_min, sgn))
5580             return ssize_int (useful_min);
5581           else if (wi::lt_p (step_max, 0, sgn))
5582             return ssize_int (-1);
5583           else
5584             return fold_convert (ssizetype, step);
5585         }
5586     }
5587   return DR_STEP (dr);
5588 }
5589
5590 /* Return a value that is negative iff DR has a negative step.  */
5591
5592 tree
5593 dr_direction_indicator (struct data_reference *dr)
5594 {
5595   return dr_step_indicator (dr, 0);
5596 }
5597
5598 /* Return a value that is zero iff DR has a zero step.  */
5599
5600 tree
5601 dr_zero_step_indicator (struct data_reference *dr)
5602 {
5603   return dr_step_indicator (dr, 1);
5604 }
5605
5606 /* Return true if DR is known to have a nonnegative (but possibly zero)
5607    step.  */
5608
5609 bool
5610 dr_known_forward_stride_p (struct data_reference *dr)
5611 {
5612   tree indicator = dr_direction_indicator (dr);
5613   tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
5614                                    fold_convert (ssizetype, indicator),
5615                                    ssize_int (0));
5616   return neg_step_val && integer_zerop (neg_step_val);
5617 }