gcc/tree-data-ref.c

   1 /* Data references and dependences detectors.
   2    Copyright (C) 2003-2020 Free Software Foundation, Inc.
   3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass walks a given loop structure searching for array
  22    references.  The information about the array accesses is recorded
  23    in DATA_REFERENCE structures.
  24
  25    The basic test for determining the dependences is:
  26    given two access functions chrec1 and chrec2 to a same array, and
  27    x and y two vectors from the iteration domain, the same element of
  28    the array is accessed twice at iterations x and y if and only if:
  29    |             chrec1 (x) == chrec2 (y).
  30
  31    The goals of this analysis are:
  32
  33    - to determine the independence: the relation between two
  34      independent accesses is qualified with the chrec_known (this
  35      information allows a loop parallelization),
  36
  37    - when two data references access the same data, to qualify the
  38      dependence relation with classic dependence representations:
  39
  40        - distance vectors
  41        - direction vectors
  42        - loop carried level dependence
  43        - polyhedron dependence
  44      or with the chains of recurrences based representation,
  45
  46    - to define a knowledge base for storing the data dependence
  47      information,
  48
  49    - to define an interface to access this data.
  50
  51
  52    Definitions:
  53
  54    - subscript: given two array accesses a subscript is the tuple
  55    composed of the access functions for a given dimension.  Example:
  56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
  57    (f1, g1), (f2, g2), (f3, g3).
  58
  59    - Diophantine equation: an equation whose coefficients and
  60    solutions are integer constants, for example the equation
  61    |   3*x + 2*y = 1
  62    has an integer solution x = 1 and y = -1.
  63
  64    References:
  65
  66    - "Advanced Compilation for High Performance Computing" by Randy
  67    Allen and Ken Kennedy.
  68    http://citeseer.ist.psu.edu/goff91practical.html
  69
  70    - "Loop Transformations for Restructuring Compilers - The Foundations"
  71    by Utpal Banerjee.
  72
  73
  74 */
  75
  76 #include "config.h"
  77 #include "system.h"
  78 #include "coretypes.h"
  79 #include "backend.h"
  80 #include "rtl.h"
  81 #include "tree.h"
  82 #include "gimple.h"
  83 #include "gimple-pretty-print.h"
  84 #include "alias.h"
  85 #include "fold-const.h"
  86 #include "expr.h"
  87 #include "gimple-iterator.h"
  88 #include "tree-ssa-loop-niter.h"
  89 #include "tree-ssa-loop.h"
  90 #include "tree-ssa.h"
  91 #include "cfgloop.h"
  92 #include "tree-data-ref.h"
  93 #include "tree-scalar-evolution.h"
  94 #include "dumpfile.h"
  95 #include "tree-affine.h"
  96 #include "builtins.h"
  97 #include "tree-eh.h"
  98 #include "ssa.h"
  99 #include "internal-fn.h"
 100
 101 static struct datadep_stats
 102 {
 103   int num_dependence_tests;
 104   int num_dependence_dependent;
 105   int num_dependence_independent;
 106   int num_dependence_undetermined;
 107
 108   int num_subscript_tests;
 109   int num_subscript_undetermined;
 110   int num_same_subscript_function;
 111
 112   int num_ziv;
 113   int num_ziv_independent;
 114   int num_ziv_dependent;
 115   int num_ziv_unimplemented;
 116
 117   int num_siv;
 118   int num_siv_independent;
 119   int num_siv_dependent;
 120   int num_siv_unimplemented;
 121
 122   int num_miv;
 123   int num_miv_independent;
 124   int num_miv_dependent;
 125   int num_miv_unimplemented;
 126 } dependence_stats;
 127
 128 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
 129                                            unsigned int, unsigned int,
 130                                            class loop *);
 131 /* Returns true iff A divides B.  */
 132
 133 static inline bool
 134 tree_fold_divides_p (const_tree a, const_tree b)
 135 {
 136   gcc_assert (TREE_CODE (a) == INTEGER_CST);
 137   gcc_assert (TREE_CODE (b) == INTEGER_CST);
 138   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
 139 }
 140
 141 /* Returns true iff A divides B.  */
 142
 143 static inline bool
 144 int_divides_p (int a, int b)
 145 {
 146   return ((b % a) == 0);
 147 }
 148
 149 /* Return true if reference REF contains a union access.  */
 150
 151 static bool
 152 ref_contains_union_access_p (tree ref)
 153 {
 154   while (handled_component_p (ref))
 155     {
 156       ref = TREE_OPERAND (ref, 0);
 157       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
 158           || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
 159         return true;
 160     }
 161   return false;
 162 }
 163
 164 \f
 165
 166 /* Dump into FILE all the data references from DATAREFS.  */
 167
 168 static void
 169 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
 170 {
 171   unsigned int i;
 172   struct data_reference *dr;
 173
 174   FOR_EACH_VEC_ELT (datarefs, i, dr)
 175     dump_data_reference (file, dr);
 176 }
 177
 178 /* Unified dump into FILE all the data references from DATAREFS.  */
 179
 180 DEBUG_FUNCTION void
 181 debug (vec<data_reference_p> &ref)
 182 {
 183   dump_data_references (stderr, ref);
 184 }
 185
 186 DEBUG_FUNCTION void
 187 debug (vec<data_reference_p> *ptr)
 188 {
 189   if (ptr)
 190     debug (*ptr);
 191   else
 192     fprintf (stderr, "<nil>\n");
 193 }
 194
 195
 196 /* Dump into STDERR all the data references from DATAREFS.  */
 197
 198 DEBUG_FUNCTION void
 199 debug_data_references (vec<data_reference_p> datarefs)
 200 {
 201   dump_data_references (stderr, datarefs);
 202 }
 203
 204 /* Print to STDERR the data_reference DR.  */
 205
 206 DEBUG_FUNCTION void
 207 debug_data_reference (struct data_reference *dr)
 208 {
 209   dump_data_reference (stderr, dr);
 210 }
 211
 212 /* Dump function for a DATA_REFERENCE structure.  */
 213
 214 void
 215 dump_data_reference (FILE *outf,
 216                      struct data_reference *dr)
 217 {
 218   unsigned int i;
 219
 220   fprintf (outf, "#(Data Ref: \n");
 221   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
 222   fprintf (outf, "#  stmt: ");
 223   print_gimple_stmt (outf, DR_STMT (dr), 0);
 224   fprintf (outf, "#  ref: ");
 225   print_generic_stmt (outf, DR_REF (dr));
 226   fprintf (outf, "#  base_object: ");
 227   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
 228
 229   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
 230     {
 231       fprintf (outf, "#  Access function %d: ", i);
 232       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
 233     }
 234   fprintf (outf, "#)\n");
 235 }
 236
 237 /* Unified dump function for a DATA_REFERENCE structure.  */
 238
 239 DEBUG_FUNCTION void
 240 debug (data_reference &ref)
 241 {
 242   dump_data_reference (stderr, &ref);
 243 }
 244
 245 DEBUG_FUNCTION void
 246 debug (data_reference *ptr)
 247 {
 248   if (ptr)
 249     debug (*ptr);
 250   else
 251     fprintf (stderr, "<nil>\n");
 252 }
 253
 254
 255 /* Dumps the affine function described by FN to the file OUTF.  */
 256
 257 DEBUG_FUNCTION void
 258 dump_affine_function (FILE *outf, affine_fn fn)
 259 {
 260   unsigned i;
 261   tree coef;
 262
 263   print_generic_expr (outf, fn[0], TDF_SLIM);
 264   for (i = 1; fn.iterate (i, &coef); i++)
 265     {
 266       fprintf (outf, " + ");
 267       print_generic_expr (outf, coef, TDF_SLIM);
 268       fprintf (outf, " * x_%u", i);
 269     }
 270 }
 271
 272 /* Dumps the conflict function CF to the file OUTF.  */
 273
 274 DEBUG_FUNCTION void
 275 dump_conflict_function (FILE *outf, conflict_function *cf)
 276 {
 277   unsigned i;
 278
 279   if (cf->n == NO_DEPENDENCE)
 280     fprintf (outf, "no dependence");
 281   else if (cf->n == NOT_KNOWN)
 282     fprintf (outf, "not known");
 283   else
 284     {
 285       for (i = 0; i < cf->n; i++)
 286         {
 287           if (i != 0)
 288             fprintf (outf, " ");
 289           fprintf (outf, "[");
 290           dump_affine_function (outf, cf->fns[i]);
 291           fprintf (outf, "]");
 292         }
 293     }
 294 }
 295
 296 /* Dump function for a SUBSCRIPT structure.  */
 297
 298 DEBUG_FUNCTION void
 299 dump_subscript (FILE *outf, struct subscript *subscript)
 300 {
 301   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
 302
 303   fprintf (outf, "\n (subscript \n");
 304   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
 305   dump_conflict_function (outf, cf);
 306   if (CF_NONTRIVIAL_P (cf))
 307     {
 308       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 309       fprintf (outf, "\n  last_conflict: ");
 310       print_generic_expr (outf, last_iteration);
 311     }
 312
 313   cf = SUB_CONFLICTS_IN_B (subscript);
 314   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
 315   dump_conflict_function (outf, cf);
 316   if (CF_NONTRIVIAL_P (cf))
 317     {
 318       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 319       fprintf (outf, "\n  last_conflict: ");
 320       print_generic_expr (outf, last_iteration);
 321     }
 322
 323   fprintf (outf, "\n  (Subscript distance: ");
 324   print_generic_expr (outf, SUB_DISTANCE (subscript));
 325   fprintf (outf, " ))\n");
 326 }
 327
 328 /* Print the classic direction vector DIRV to OUTF.  */
 329
 330 DEBUG_FUNCTION void
 331 print_direction_vector (FILE *outf,
 332                         lambda_vector dirv,
 333                         int length)
 334 {
 335   int eq;
 336
 337   for (eq = 0; eq < length; eq++)
 338     {
 339       enum data_dependence_direction dir = ((enum data_dependence_direction)
 340                                             dirv[eq]);
 341
 342       switch (dir)
 343         {
 344         case dir_positive:
 345           fprintf (outf, "    +");
 346           break;
 347         case dir_negative:
 348           fprintf (outf, "    -");
 349           break;
 350         case dir_equal:
 351           fprintf (outf, "    =");
 352           break;
 353         case dir_positive_or_equal:
 354           fprintf (outf, "   +=");
 355           break;
 356         case dir_positive_or_negative:
 357           fprintf (outf, "   +-");
 358           break;
 359         case dir_negative_or_equal:
 360           fprintf (outf, "   -=");
 361           break;
 362         case dir_star:
 363           fprintf (outf, "    *");
 364           break;
 365         default:
 366           fprintf (outf, "indep");
 367           break;
 368         }
 369     }
 370   fprintf (outf, "\n");
 371 }
 372
 373 /* Print a vector of direction vectors.  */
 374
 375 DEBUG_FUNCTION void
 376 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
 377                    int length)
 378 {
 379   unsigned j;
 380   lambda_vector v;
 381
 382   FOR_EACH_VEC_ELT (dir_vects, j, v)
 383     print_direction_vector (outf, v, length);
 384 }
 385
 386 /* Print out a vector VEC of length N to OUTFILE.  */
 387
 388 DEBUG_FUNCTION void
 389 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
 390 {
 391   int i;
 392
 393   for (i = 0; i < n; i++)
 394     fprintf (outfile, "%3d ", (int)vector[i]);
 395   fprintf (outfile, "\n");
 396 }
 397
 398 /* Print a vector of distance vectors.  */
 399
 400 DEBUG_FUNCTION void
 401 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
 402                     int length)
 403 {
 404   unsigned j;
 405   lambda_vector v;
 406
 407   FOR_EACH_VEC_ELT (dist_vects, j, v)
 408     print_lambda_vector (outf, v, length);
 409 }
 410
 411 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
 412
 413 DEBUG_FUNCTION void
 414 dump_data_dependence_relation (FILE *outf,
 415                                struct data_dependence_relation *ddr)
 416 {
 417   struct data_reference *dra, *drb;
 418
 419   fprintf (outf, "(Data Dep: \n");
 420
 421   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 422     {
 423       if (ddr)
 424         {
 425           dra = DDR_A (ddr);
 426           drb = DDR_B (ddr);
 427           if (dra)
 428             dump_data_reference (outf, dra);
 429           else
 430             fprintf (outf, "    (nil)\n");
 431           if (drb)
 432             dump_data_reference (outf, drb);
 433           else
 434             fprintf (outf, "    (nil)\n");
 435         }
 436       fprintf (outf, "    (don't know)\n)\n");
 437       return;
 438     }
 439
 440   dra = DDR_A (ddr);
 441   drb = DDR_B (ddr);
 442   dump_data_reference (outf, dra);
 443   dump_data_reference (outf, drb);
 444
 445   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 446     fprintf (outf, "    (no dependence)\n");
 447
 448   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
 449     {
 450       unsigned int i;
 451       class loop *loopi;
 452
 453       subscript *sub;
 454       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
 455         {
 456           fprintf (outf, "  access_fn_A: ");
 457           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
 458           fprintf (outf, "  access_fn_B: ");
 459           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
 460           dump_subscript (outf, sub);
 461         }
 462
 463       fprintf (outf, "  loop nest: (");
 464       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
 465         fprintf (outf, "%d ", loopi->num);
 466       fprintf (outf, ")\n");
 467
 468       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
 469         {
 470           fprintf (outf, "  distance_vector: ");
 471           print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
 472                                DDR_NB_LOOPS (ddr));
 473         }
 474
 475       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
 476         {
 477           fprintf (outf, "  direction_vector: ");
 478           print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
 479                                   DDR_NB_LOOPS (ddr));
 480         }
 481     }
 482
 483   fprintf (outf, ")\n");
 484 }
 485
 486 /* Debug version.  */
 487
 488 DEBUG_FUNCTION void
 489 debug_data_dependence_relation (struct data_dependence_relation *ddr)
 490 {
 491   dump_data_dependence_relation (stderr, ddr);
 492 }
 493
 494 /* Dump into FILE all the dependence relations from DDRS.  */
 495
 496 DEBUG_FUNCTION void
 497 dump_data_dependence_relations (FILE *file,
 498                                 vec<ddr_p> ddrs)
 499 {
 500   unsigned int i;
 501   struct data_dependence_relation *ddr;
 502
 503   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 504     dump_data_dependence_relation (file, ddr);
 505 }
 506
 507 DEBUG_FUNCTION void
 508 debug (vec<ddr_p> &ref)
 509 {
 510   dump_data_dependence_relations (stderr, ref);
 511 }
 512
 513 DEBUG_FUNCTION void
 514 debug (vec<ddr_p> *ptr)
 515 {
 516   if (ptr)
 517     debug (*ptr);
 518   else
 519     fprintf (stderr, "<nil>\n");
 520 }
 521
 522
 523 /* Dump to STDERR all the dependence relations from DDRS.  */
 524
 525 DEBUG_FUNCTION void
 526 debug_data_dependence_relations (vec<ddr_p> ddrs)
 527 {
 528   dump_data_dependence_relations (stderr, ddrs);
 529 }
 530
 531 /* Dumps the distance and direction vectors in FILE.  DDRS contains
 532    the dependence relations, and VECT_SIZE is the size of the
 533    dependence vectors, or in other words the number of loops in the
 534    considered nest.  */
 535
 536 DEBUG_FUNCTION void
 537 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
 538 {
 539   unsigned int i, j;
 540   struct data_dependence_relation *ddr;
 541   lambda_vector v;
 542
 543   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 544     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
 545       {
 546         FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
 547           {
 548             fprintf (file, "DISTANCE_V (");
 549             print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
 550             fprintf (file, ")\n");
 551           }
 552
 553         FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
 554           {
 555             fprintf (file, "DIRECTION_V (");
 556             print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
 557             fprintf (file, ")\n");
 558           }
 559       }
 560
 561   fprintf (file, "\n\n");
 562 }
 563
 564 /* Dumps the data dependence relations DDRS in FILE.  */
 565
 566 DEBUG_FUNCTION void
 567 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
 568 {
 569   unsigned int i;
 570   struct data_dependence_relation *ddr;
 571
 572   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 573     dump_data_dependence_relation (file, ddr);
 574
 575   fprintf (file, "\n\n");
 576 }
 577
 578 DEBUG_FUNCTION void
 579 debug_ddrs (vec<ddr_p> ddrs)
 580 {
 581   dump_ddrs (stderr, ddrs);
 582 }
 583
 584 static void
 585 split_constant_offset (tree exp, tree *var, tree *off,
 586                        hash_map<tree, std::pair<tree, tree> > &cache,
 587                        unsigned *limit);
 588
 589 /* Helper function for split_constant_offset.  Expresses OP0 CODE OP1
 590    (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
 591    constant of type ssizetype, and returns true.  If we cannot do this
 592    with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
 593    is returned.  */
 594
 595 static bool
 596 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 597                          tree *var, tree *off,
 598                          hash_map<tree, std::pair<tree, tree> > &cache,
 599                          unsigned *limit)
 600 {
 601   tree var0, var1;
 602   tree off0, off1;
 603   enum tree_code ocode = code;
 604
 605   *var = NULL_TREE;
 606   *off = NULL_TREE;
 607
 608   switch (code)
 609     {
 610     case INTEGER_CST:
 611       *var = build_int_cst (type, 0);
 612       *off = fold_convert (ssizetype, op0);
 613       return true;
 614
 615     case POINTER_PLUS_EXPR:
 616       ocode = PLUS_EXPR;
 617       /* FALLTHROUGH */
 618     case PLUS_EXPR:
 619     case MINUS_EXPR:
 620       if (TREE_CODE (op1) == INTEGER_CST)
 621         {
 622           split_constant_offset (op0, &var0, &off0, cache, limit);
 623           *var = var0;
 624           *off = size_binop (ocode, off0, fold_convert (ssizetype, op1));
 625           return true;
 626         }
 627       split_constant_offset (op0, &var0, &off0, cache, limit);
 628       split_constant_offset (op1, &var1, &off1, cache, limit);
 629       *var = fold_build2 (code, type, var0, var1);
 630       *off = size_binop (ocode, off0, off1);
 631       return true;
 632
 633     case MULT_EXPR:
 634       if (TREE_CODE (op1) != INTEGER_CST)
 635         return false;
 636
 637       split_constant_offset (op0, &var0, &off0, cache, limit);
 638       *var = fold_build2 (MULT_EXPR, type, var0, op1);
 639       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
 640       return true;
 641
 642     case ADDR_EXPR:
 643       {
 644         tree base, poffset;
 645         poly_int64 pbitsize, pbitpos, pbytepos;
 646         machine_mode pmode;
 647         int punsignedp, preversep, pvolatilep;
 648
 649         op0 = TREE_OPERAND (op0, 0);
 650         base
 651           = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
 652                                  &punsignedp, &preversep, &pvolatilep);
 653
 654         if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 655           return false;
 656         base = build_fold_addr_expr (base);
 657         off0 = ssize_int (pbytepos);
 658
 659         if (poffset)
 660           {
 661             split_constant_offset (poffset, &poffset, &off1, cache, limit);
 662             off0 = size_binop (PLUS_EXPR, off0, off1);
 663             if (POINTER_TYPE_P (TREE_TYPE (base)))
 664               base = fold_build_pointer_plus (base, poffset);
 665             else
 666               base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
 667                                   fold_convert (TREE_TYPE (base), poffset));
 668           }
 669
 670         var0 = fold_convert (type, base);
 671
 672         /* If variable length types are involved, punt, otherwise casts
 673            might be converted into ARRAY_REFs in gimplify_conversion.
 674            To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
 675            possibly no longer appears in current GIMPLE, might resurface.
 676            This perhaps could run
 677            if (CONVERT_EXPR_P (var0))
 678              {
 679                gimplify_conversion (&var0);
 680                // Attempt to fill in any within var0 found ARRAY_REF's
 681                // element size from corresponding op embedded ARRAY_REF,
 682                // if unsuccessful, just punt.
 683              }  */
 684         while (POINTER_TYPE_P (type))
 685           type = TREE_TYPE (type);
 686         if (int_size_in_bytes (type) < 0)
 687           return false;
 688
 689         *var = var0;
 690         *off = off0;
 691         return true;
 692       }
 693
 694     case SSA_NAME:
 695       {
 696         if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
 697           return false;
 698
 699         gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
 700         enum tree_code subcode;
 701
 702         if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
 703           return false;
 704
 705         subcode = gimple_assign_rhs_code (def_stmt);
 706
 707         /* We are using a cache to avoid un-CSEing large amounts of code.  */
 708         bool use_cache = false;
 709         if (!has_single_use (op0)
 710             && (subcode == POINTER_PLUS_EXPR
 711                 || subcode == PLUS_EXPR
 712                 || subcode == MINUS_EXPR
 713                 || subcode == MULT_EXPR
 714                 || subcode == ADDR_EXPR
 715                 || CONVERT_EXPR_CODE_P (subcode)))
 716           {
 717             use_cache = true;
 718             bool existed;
 719             std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
 720             if (existed)
 721               {
 722                 if (integer_zerop (e.second))
 723                   return false;
 724                 *var = e.first;
 725                 *off = e.second;
 726                 return true;
 727               }
 728             e = std::make_pair (op0, ssize_int (0));
 729           }
 730
 731         if (*limit == 0)
 732           return false;
 733         --*limit;
 734
 735         var0 = gimple_assign_rhs1 (def_stmt);
 736         var1 = gimple_assign_rhs2 (def_stmt);
 737
 738         bool res = split_constant_offset_1 (type, var0, subcode, var1,
 739                                             var, off, cache, limit);
 740         if (res && use_cache)
 741           *cache.get (op0) = std::make_pair (*var, *off);
 742         return res;
 743       }
 744     CASE_CONVERT:
 745       {
 746         /* We must not introduce undefined overflow, and we must not change
 747            the value.  Hence we're okay if the inner type doesn't overflow
 748            to start with (pointer or signed), the outer type also is an
 749            integer or pointer and the outer precision is at least as large
 750            as the inner.  */
 751         tree itype = TREE_TYPE (op0);
 752         if ((POINTER_TYPE_P (itype)
 753              || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
 754             && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
 755             && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
 756           {
 757             if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype))
 758               {
 759                 /* Split the unconverted operand and try to prove that
 760                    wrapping isn't a problem.  */
 761                 tree tmp_var, tmp_off;
 762                 split_constant_offset (op0, &tmp_var, &tmp_off, cache, limit);
 763
 764                 /* See whether we have an SSA_NAME whose range is known
 765                    to be [A, B].  */
 766                 if (TREE_CODE (tmp_var) != SSA_NAME)
 767                   return false;
 768                 wide_int var_min, var_max;
 769                 value_range_kind vr_type = get_range_info (tmp_var, &var_min,
 770                                                            &var_max);
 771                 wide_int var_nonzero = get_nonzero_bits (tmp_var);
 772                 signop sgn = TYPE_SIGN (itype);
 773                 if (intersect_range_with_nonzero_bits (vr_type, &var_min,
 774                                                        &var_max, var_nonzero,
 775                                                        sgn) != VR_RANGE)
 776                   return false;
 777
 778                 /* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
 779                    is known to be [A + TMP_OFF, B + TMP_OFF], with all
 780                    operations done in ITYPE.  The addition must overflow
 781                    at both ends of the range or at neither.  */
 782                 wi::overflow_type overflow[2];
 783                 unsigned int prec = TYPE_PRECISION (itype);
 784                 wide_int woff = wi::to_wide (tmp_off, prec);
 785                 wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
 786                 wi::add (var_max, woff, sgn, &overflow[1]);
 787                 if ((overflow[0] != wi::OVF_NONE) != (overflow[1] != wi::OVF_NONE))
 788                   return false;
 789
 790                 /* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR.  */
 791                 widest_int diff = (widest_int::from (op0_min, sgn)
 792                                    - widest_int::from (var_min, sgn));
 793                 var0 = tmp_var;
 794                 *off = wide_int_to_tree (ssizetype, diff);
 795               }
 796             else
 797               split_constant_offset (op0, &var0, off, cache, limit);
 798             *var = fold_convert (type, var0);
 799             return true;
 800           }
 801         return false;
 802       }
 803
 804     default:
 805       return false;
 806     }
 807 }
 808
 809 /* Expresses EXP as VAR + OFF, where off is a constant.  The type of OFF
 810    will be ssizetype.  */
 811
 812 static void
 813 split_constant_offset (tree exp, tree *var, tree *off,
 814                        hash_map<tree, std::pair<tree, tree> > &cache,
 815                        unsigned *limit)
 816 {
 817   tree type = TREE_TYPE (exp), op0, op1, e, o;
 818   enum tree_code code;
 819
 820   *var = exp;
 821   *off = ssize_int (0);
 822
 823   if (tree_is_chrec (exp)
 824       || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
 825     return;
 826
 827   code = TREE_CODE (exp);
 828   extract_ops_from_tree (exp, &code, &op0, &op1);
 829   if (split_constant_offset_1 (type, op0, code, op1, &e, &o, cache, limit))
 830     {
 831       *var = e;
 832       *off = o;
 833     }
 834 }
 835
 836 void
 837 split_constant_offset (tree exp, tree *var, tree *off)
 838 {
 839   unsigned limit = param_ssa_name_def_chain_limit;
 840   static hash_map<tree, std::pair<tree, tree> > *cache;
 841   if (!cache)
 842     cache = new hash_map<tree, std::pair<tree, tree> > (37);
 843   split_constant_offset (exp, var, off, *cache, &limit);
 844   cache->empty ();
 845 }
 846
 847 /* Returns the address ADDR of an object in a canonical shape (without nop
 848    casts, and with type of pointer to the object).  */
 849
 850 static tree
 851 canonicalize_base_object_address (tree addr)
 852 {
 853   tree orig = addr;
 854
 855   STRIP_NOPS (addr);
 856
 857   /* The base address may be obtained by casting from integer, in that case
 858      keep the cast.  */
 859   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
 860     return orig;
 861
 862   if (TREE_CODE (addr) != ADDR_EXPR)
 863     return addr;
 864
 865   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
 866 }
 867
 868 /* Analyze the behavior of memory reference REF within STMT.
 869    There are two modes:
 870
 871    - BB analysis.  In this case we simply split the address into base,
 872      init and offset components, without reference to any containing loop.
 873      The resulting base and offset are general expressions and they can
 874      vary arbitrarily from one iteration of the containing loop to the next.
 875      The step is always zero.
 876
 877    - loop analysis.  In this case we analyze the reference both wrt LOOP
 878      and on the basis that the reference occurs (is "used") in LOOP;
 879      see the comment above analyze_scalar_evolution_in_loop for more
 880      information about this distinction.  The base, init, offset and
 881      step fields are all invariant in LOOP.
 882
 883    Perform BB analysis if LOOP is null, or if LOOP is the function's
 884    dummy outermost loop.  In other cases perform loop analysis.
 885
 886    Return true if the analysis succeeded and store the results in DRB if so.
 887    BB analysis can only fail for bitfield or reversed-storage accesses.  */
 888
 889 opt_result
 890 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
 891                       class loop *loop, const gimple *stmt)
 892 {
 893   poly_int64 pbitsize, pbitpos;
 894   tree base, poffset;
 895   machine_mode pmode;
 896   int punsignedp, preversep, pvolatilep;
 897   affine_iv base_iv, offset_iv;
 898   tree init, dinit, step;
 899   bool in_loop = (loop && loop->num);
 900
 901   if (dump_file && (dump_flags & TDF_DETAILS))
 902     fprintf (dump_file, "analyze_innermost: ");
 903
 904   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
 905                               &punsignedp, &preversep, &pvolatilep);
 906   gcc_assert (base != NULL_TREE);
 907
 908   poly_int64 pbytepos;
 909   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 910     return opt_result::failure_at (stmt,
 911                                    "failed: bit offset alignment.\n");
 912
 913   if (preversep)
 914     return opt_result::failure_at (stmt,
 915                                    "failed: reverse storage order.\n");
 916
 917   /* Calculate the alignment and misalignment for the inner reference.  */
 918   unsigned int HOST_WIDE_INT bit_base_misalignment;
 919   unsigned int bit_base_alignment;
 920   get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
 921
 922   /* There are no bitfield references remaining in BASE, so the values
 923      we got back must be whole bytes.  */
 924   gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
 925               && bit_base_misalignment % BITS_PER_UNIT == 0);
 926   unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
 927   poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
 928
 929   if (TREE_CODE (base) == MEM_REF)
 930     {
 931       if (!integer_zerop (TREE_OPERAND (base, 1)))
 932         {
 933           /* Subtract MOFF from the base and add it to POFFSET instead.
 934              Adjust the misalignment to reflect the amount we subtracted.  */
 935           poly_offset_int moff = mem_ref_offset (base);
 936           base_misalignment -= moff.force_shwi ();
 937           tree mofft = wide_int_to_tree (sizetype, moff);
 938           if (!poffset)
 939             poffset = mofft;
 940           else
 941             poffset = size_binop (PLUS_EXPR, poffset, mofft);
 942         }
 943       base = TREE_OPERAND (base, 0);
 944     }
 945   else
 946     base = build_fold_addr_expr (base);
 947
 948   if (in_loop)
 949     {
 950       if (!simple_iv (loop, loop, base, &base_iv, true))
 951         return opt_result::failure_at
 952           (stmt, "failed: evolution of base is not affine.\n");
 953     }
 954   else
 955     {
 956       base_iv.base = base;
 957       base_iv.step = ssize_int (0);
 958       base_iv.no_overflow = true;
 959     }
 960
 961   if (!poffset)
 962     {
 963       offset_iv.base = ssize_int (0);
 964       offset_iv.step = ssize_int (0);
 965     }
 966   else
 967     {
 968       if (!in_loop)
 969         {
 970           offset_iv.base = poffset;
 971           offset_iv.step = ssize_int (0);
 972         }
 973       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
 974         return opt_result::failure_at
 975           (stmt, "failed: evolution of offset is not affine.\n");
 976     }
 977
 978   init = ssize_int (pbytepos);
 979
 980   /* Subtract any constant component from the base and add it to INIT instead.
 981      Adjust the misalignment to reflect the amount we subtracted.  */
 982   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
 983   init = size_binop (PLUS_EXPR, init, dinit);
 984   base_misalignment -= TREE_INT_CST_LOW (dinit);
 985
 986   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
 987   init = size_binop (PLUS_EXPR, init, dinit);
 988
 989   step = size_binop (PLUS_EXPR,
 990                      fold_convert (ssizetype, base_iv.step),
 991                      fold_convert (ssizetype, offset_iv.step));
 992
 993   base = canonicalize_base_object_address (base_iv.base);
 994
 995   /* See if get_pointer_alignment can guarantee a higher alignment than
 996      the one we calculated above.  */
 997   unsigned int HOST_WIDE_INT alt_misalignment;
 998   unsigned int alt_alignment;
 999   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1000
1001   /* As above, these values must be whole bytes.  */
1002   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1003               && alt_misalignment % BITS_PER_UNIT == 0);
1004   alt_alignment /= BITS_PER_UNIT;
1005   alt_misalignment /= BITS_PER_UNIT;
1006
1007   if (base_alignment < alt_alignment)
1008     {
1009       base_alignment = alt_alignment;
1010       base_misalignment = alt_misalignment;
1011     }
1012
1013   drb->base_address = base;
1014   drb->offset = fold_convert (ssizetype, offset_iv.base);
1015   drb->init = init;
1016   drb->step = step;
1017   if (known_misalignment (base_misalignment, base_alignment,
1018                           &drb->base_misalignment))
1019     drb->base_alignment = base_alignment;
1020   else
1021     {
1022       drb->base_alignment = known_alignment (base_misalignment);
1023       drb->base_misalignment = 0;
1024     }
1025   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1026   drb->step_alignment = highest_pow2_factor (step);
1027
1028   if (dump_file && (dump_flags & TDF_DETAILS))
1029     fprintf (dump_file, "success.\n");
1030
1031   return opt_result::success ();
1032 }
1033
1034 /* Return true if OP is a valid component reference for a DR access
1035    function.  This accepts a subset of what handled_component_p accepts.  */
1036
1037 static bool
1038 access_fn_component_p (tree op)
1039 {
1040   switch (TREE_CODE (op))
1041     {
1042     case REALPART_EXPR:
1043     case IMAGPART_EXPR:
1044     case ARRAY_REF:
1045       return true;
1046
1047     case COMPONENT_REF:
1048       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1049
1050     default:
1051       return false;
1052     }
1053 }
1054
1055 /* Determines the base object and the list of indices of memory reference
1056    DR, analyzed in LOOP and instantiated before NEST.  */
1057
1058 static void
1059 dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
1060 {
1061   vec<tree> access_fns = vNULL;
1062   tree ref, op;
1063   tree base, off, access_fn;
1064
1065   /* If analyzing a basic-block there are no indices to analyze
1066      and thus no access functions.  */
1067   if (!nest)
1068     {
1069       DR_BASE_OBJECT (dr) = DR_REF (dr);
1070       DR_ACCESS_FNS (dr).create (0);
1071       return;
1072     }
1073
1074   ref = DR_REF (dr);
1075
1076   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1077      into a two element array with a constant index.  The base is
1078      then just the immediate underlying object.  */
1079   if (TREE_CODE (ref) == REALPART_EXPR)
1080     {
1081       ref = TREE_OPERAND (ref, 0);
1082       access_fns.safe_push (integer_zero_node);
1083     }
1084   else if (TREE_CODE (ref) == IMAGPART_EXPR)
1085     {
1086       ref = TREE_OPERAND (ref, 0);
1087       access_fns.safe_push (integer_one_node);
1088     }
1089
1090   /* Analyze access functions of dimensions we know to be independent.
1091      The list of component references handled here should be kept in
1092      sync with access_fn_component_p.  */
1093   while (handled_component_p (ref))
1094     {
1095       if (TREE_CODE (ref) == ARRAY_REF)
1096         {
1097           op = TREE_OPERAND (ref, 1);
1098           access_fn = analyze_scalar_evolution (loop, op);
1099           access_fn = instantiate_scev (nest, loop, access_fn);
1100           access_fns.safe_push (access_fn);
1101         }
1102       else if (TREE_CODE (ref) == COMPONENT_REF
1103                && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1104         {
1105           /* For COMPONENT_REFs of records (but not unions!) use the
1106              FIELD_DECL offset as constant access function so we can
1107              disambiguate a[i].f1 and a[i].f2.  */
1108           tree off = component_ref_field_offset (ref);
1109           off = size_binop (PLUS_EXPR,
1110                             size_binop (MULT_EXPR,
1111                                         fold_convert (bitsizetype, off),
1112                                         bitsize_int (BITS_PER_UNIT)),
1113                             DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1114           access_fns.safe_push (off);
1115         }
1116       else
1117         /* If we have an unhandled component we could not translate
1118            to an access function stop analyzing.  We have determined
1119            our base object in this case.  */
1120         break;
1121
1122       ref = TREE_OPERAND (ref, 0);
1123     }
1124
1125   /* If the address operand of a MEM_REF base has an evolution in the
1126      analyzed nest, add it as an additional independent access-function.  */
1127   if (TREE_CODE (ref) == MEM_REF)
1128     {
1129       op = TREE_OPERAND (ref, 0);
1130       access_fn = analyze_scalar_evolution (loop, op);
1131       access_fn = instantiate_scev (nest, loop, access_fn);
1132       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1133         {
1134           tree orig_type;
1135           tree memoff = TREE_OPERAND (ref, 1);
1136           base = initial_condition (access_fn);
1137           orig_type = TREE_TYPE (base);
1138           STRIP_USELESS_TYPE_CONVERSION (base);
1139           split_constant_offset (base, &base, &off);
1140           STRIP_USELESS_TYPE_CONVERSION (base);
1141           /* Fold the MEM_REF offset into the evolutions initial
1142              value to make more bases comparable.  */
1143           if (!integer_zerop (memoff))
1144             {
1145               off = size_binop (PLUS_EXPR, off,
1146                                 fold_convert (ssizetype, memoff));
1147               memoff = build_int_cst (TREE_TYPE (memoff), 0);
1148             }
1149           /* Adjust the offset so it is a multiple of the access type
1150              size and thus we separate bases that can possibly be used
1151              to produce partial overlaps (which the access_fn machinery
1152              cannot handle).  */
1153           wide_int rem;
1154           if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1155               && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1156               && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1157             rem = wi::mod_trunc
1158               (wi::to_wide (off),
1159                wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1160                SIGNED);
1161           else
1162             /* If we can't compute the remainder simply force the initial
1163                condition to zero.  */
1164             rem = wi::to_wide (off);
1165           off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1166           memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1167           /* And finally replace the initial condition.  */
1168           access_fn = chrec_replace_initial_condition
1169               (access_fn, fold_convert (orig_type, off));
1170           /* ???  This is still not a suitable base object for
1171              dr_may_alias_p - the base object needs to be an
1172              access that covers the object as whole.  With
1173              an evolution in the pointer this cannot be
1174              guaranteed.
1175              As a band-aid, mark the access so we can special-case
1176              it in dr_may_alias_p.  */
1177           tree old = ref;
1178           ref = fold_build2_loc (EXPR_LOCATION (ref),
1179                                  MEM_REF, TREE_TYPE (ref),
1180                                  base, memoff);
1181           MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1182           MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1183           DR_UNCONSTRAINED_BASE (dr) = true;
1184           access_fns.safe_push (access_fn);
1185         }
1186     }
1187   else if (DECL_P (ref))
1188     {
1189       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1190       ref = build2 (MEM_REF, TREE_TYPE (ref),
1191                     build_fold_addr_expr (ref),
1192                     build_int_cst (reference_alias_ptr_type (ref), 0));
1193     }
1194
1195   DR_BASE_OBJECT (dr) = ref;
1196   DR_ACCESS_FNS (dr) = access_fns;
1197 }
1198
1199 /* Extracts the alias analysis information from the memory reference DR.  */
1200
1201 static void
1202 dr_analyze_alias (struct data_reference *dr)
1203 {
1204   tree ref = DR_REF (dr);
1205   tree base = get_base_address (ref), addr;
1206
1207   if (INDIRECT_REF_P (base)
1208       || TREE_CODE (base) == MEM_REF)
1209     {
1210       addr = TREE_OPERAND (base, 0);
1211       if (TREE_CODE (addr) == SSA_NAME)
1212         DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1213     }
1214 }
1215
1216 /* Frees data reference DR.  */
1217
1218 void
1219 free_data_ref (data_reference_p dr)
1220 {
1221   DR_ACCESS_FNS (dr).release ();
1222   free (dr);
1223 }
1224
1225 /* Analyze memory reference MEMREF, which is accessed in STMT.
1226    The reference is a read if IS_READ is true, otherwise it is a write.
1227    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1228    within STMT, i.e. that it might not occur even if STMT is executed
1229    and runs to completion.
1230
1231    Return the data_reference description of MEMREF.  NEST is the outermost
1232    loop in which the reference should be instantiated, LOOP is the loop
1233    in which the data reference should be analyzed.  */
1234
1235 struct data_reference *
1236 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1237                  bool is_read, bool is_conditional_in_stmt)
1238 {
1239   struct data_reference *dr;
1240
1241   if (dump_file && (dump_flags & TDF_DETAILS))
1242     {
1243       fprintf (dump_file, "Creating dr for ");
1244       print_generic_expr (dump_file, memref, TDF_SLIM);
1245       fprintf (dump_file, "\n");
1246     }
1247
1248   dr = XCNEW (struct data_reference);
1249   DR_STMT (dr) = stmt;
1250   DR_REF (dr) = memref;
1251   DR_IS_READ (dr) = is_read;
1252   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1253
1254   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1255                         nest != NULL ? loop : NULL, stmt);
1256   dr_analyze_indices (dr, nest, loop);
1257   dr_analyze_alias (dr);
1258
1259   if (dump_file && (dump_flags & TDF_DETAILS))
1260     {
1261       unsigned i;
1262       fprintf (dump_file, "\tbase_address: ");
1263       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1264       fprintf (dump_file, "\n\toffset from base address: ");
1265       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1266       fprintf (dump_file, "\n\tconstant offset from base address: ");
1267       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1268       fprintf (dump_file, "\n\tstep: ");
1269       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1270       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1271       fprintf (dump_file, "\n\tbase misalignment: %d",
1272                DR_BASE_MISALIGNMENT (dr));
1273       fprintf (dump_file, "\n\toffset alignment: %d",
1274                DR_OFFSET_ALIGNMENT (dr));
1275       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1276       fprintf (dump_file, "\n\tbase_object: ");
1277       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1278       fprintf (dump_file, "\n");
1279       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1280         {
1281           fprintf (dump_file, "\tAccess function %d: ", i);
1282           print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1283         }
1284     }
1285
1286   return dr;
1287 }
1288
1289 /*  A helper function computes order between two tree expressions T1 and T2.
1290     This is used in comparator functions sorting objects based on the order
1291     of tree expressions.  The function returns -1, 0, or 1.  */
1292
1293 int
1294 data_ref_compare_tree (tree t1, tree t2)
1295 {
1296   int i, cmp;
1297   enum tree_code code;
1298   char tclass;
1299
1300   if (t1 == t2)
1301     return 0;
1302   if (t1 == NULL)
1303     return -1;
1304   if (t2 == NULL)
1305     return 1;
1306
1307   STRIP_USELESS_TYPE_CONVERSION (t1);
1308   STRIP_USELESS_TYPE_CONVERSION (t2);
1309   if (t1 == t2)
1310     return 0;
1311
1312   if (TREE_CODE (t1) != TREE_CODE (t2)
1313       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1314     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1315
1316   code = TREE_CODE (t1);
1317   switch (code)
1318     {
1319     case INTEGER_CST:
1320       return tree_int_cst_compare (t1, t2);
1321
1322     case STRING_CST:
1323       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1324         return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1325       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1326                      TREE_STRING_LENGTH (t1));
1327
1328     case SSA_NAME:
1329       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1330         return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1331       break;
1332
1333     default:
1334       if (POLY_INT_CST_P (t1))
1335         return compare_sizes_for_sort (wi::to_poly_widest (t1),
1336                                        wi::to_poly_widest (t2));
1337
1338       tclass = TREE_CODE_CLASS (code);
1339
1340       /* For decls, compare their UIDs.  */
1341       if (tclass == tcc_declaration)
1342         {
1343           if (DECL_UID (t1) != DECL_UID (t2))
1344             return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1345           break;
1346         }
1347       /* For expressions, compare their operands recursively.  */
1348       else if (IS_EXPR_CODE_CLASS (tclass))
1349         {
1350           for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1351             {
1352               cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1353                                            TREE_OPERAND (t2, i));
1354               if (cmp != 0)
1355                 return cmp;
1356             }
1357         }
1358       else
1359         gcc_unreachable ();
1360     }
1361
1362   return 0;
1363 }
1364
1365 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1366    check.  */
1367
1368 opt_result
1369 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1370 {
1371   if (dump_enabled_p ())
1372     dump_printf (MSG_NOTE,
1373                  "consider run-time aliasing test between %T and %T\n",
1374                  DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1375
1376   if (!speed_p)
1377     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1378                                    "runtime alias check not supported when"
1379                                    " optimizing for size.\n");
1380
1381   /* FORNOW: We don't support versioning with outer-loop in either
1382      vectorization or loop distribution.  */
1383   if (loop != NULL && loop->inner != NULL)
1384     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1385                                    "runtime alias check not supported for"
1386                                    " outer loop.\n");
1387
1388   return opt_result::success ();
1389 }
1390
1391 /* Operator == between two dr_with_seg_len objects.
1392
1393    This equality operator is used to make sure two data refs
1394    are the same one so that we will consider to combine the
1395    aliasing checks of those two pairs of data dependent data
1396    refs.  */
1397
1398 static bool
1399 operator == (const dr_with_seg_len& d1,
1400              const dr_with_seg_len& d2)
1401 {
1402   return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1403                            DR_BASE_ADDRESS (d2.dr), 0)
1404           && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1405           && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1406           && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1407           && known_eq (d1.access_size, d2.access_size)
1408           && d1.align == d2.align);
1409 }
1410
1411 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1412    so that we can combine aliasing checks in one scan.  */
1413
1414 static int
1415 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1416 {
1417   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1418   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1419   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1420   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1421
1422   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1423      if a and c have the same basic address snd step, and b and d have the same
1424      address and step.  Therefore, if any a&c or b&d don't have the same address
1425      and step, we don't care the order of those two pairs after sorting.  */
1426   int comp_res;
1427
1428   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1429                                          DR_BASE_ADDRESS (b1.dr))) != 0)
1430     return comp_res;
1431   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1432                                          DR_BASE_ADDRESS (b2.dr))) != 0)
1433     return comp_res;
1434   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1435                                          DR_STEP (b1.dr))) != 0)
1436     return comp_res;
1437   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1438                                          DR_STEP (b2.dr))) != 0)
1439     return comp_res;
1440   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1441                                          DR_OFFSET (b1.dr))) != 0)
1442     return comp_res;
1443   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1444                                          DR_INIT (b1.dr))) != 0)
1445     return comp_res;
1446   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1447                                          DR_OFFSET (b2.dr))) != 0)
1448     return comp_res;
1449   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1450                                          DR_INIT (b2.dr))) != 0)
1451     return comp_res;
1452
1453   return 0;
1454 }
1455
1456 /* Dump information about ALIAS_PAIR, indenting each line by INDENT.  */
1457
1458 static void
1459 dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1460 {
1461   dump_printf (MSG_NOTE, "%sreference:      %T vs. %T\n", indent,
1462                DR_REF (alias_pair->first.dr),
1463                DR_REF (alias_pair->second.dr));
1464
1465   dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1466                alias_pair->first.seg_len);
1467   if (!operand_equal_p (alias_pair->first.seg_len,
1468                         alias_pair->second.seg_len, 0))
1469     dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1470
1471   dump_printf (MSG_NOTE, "\n%saccess size:    ", indent);
1472   dump_dec (MSG_NOTE, alias_pair->first.access_size);
1473   if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1474     {
1475       dump_printf (MSG_NOTE, " vs. ");
1476       dump_dec (MSG_NOTE, alias_pair->second.access_size);
1477     }
1478
1479   dump_printf (MSG_NOTE, "\n%salignment:      %d", indent,
1480                alias_pair->first.align);
1481   if (alias_pair->first.align != alias_pair->second.align)
1482     dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1483
1484   dump_printf (MSG_NOTE, "\n%sflags:         ", indent);
1485   if (alias_pair->flags & DR_ALIAS_RAW)
1486     dump_printf (MSG_NOTE, " RAW");
1487   if (alias_pair->flags & DR_ALIAS_WAR)
1488     dump_printf (MSG_NOTE, " WAR");
1489   if (alias_pair->flags & DR_ALIAS_WAW)
1490     dump_printf (MSG_NOTE, " WAW");
1491   if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1492     dump_printf (MSG_NOTE, " ARBITRARY");
1493   if (alias_pair->flags & DR_ALIAS_SWAPPED)
1494     dump_printf (MSG_NOTE, " SWAPPED");
1495   if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1496     dump_printf (MSG_NOTE, " UNSWAPPED");
1497   if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1498     dump_printf (MSG_NOTE, " MIXED_STEPS");
1499   if (alias_pair->flags == 0)
1500     dump_printf (MSG_NOTE, " <none>");
1501   dump_printf (MSG_NOTE, "\n");
1502 }
1503
1504 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1505    FACTOR is number of iterations that each data reference is accessed.
1506
1507    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1508    we create an expression:
1509
1510    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1511    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1512
1513    for aliasing checks.  However, in some cases we can decrease the number
1514    of checks by combining two checks into one.  For example, suppose we have
1515    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1516    condition is satisfied:
1517
1518    load_ptr_0 < load_ptr_1  &&
1519    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1520
1521    (this condition means, in each iteration of vectorized loop, the accessed
1522    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1523    load_ptr_1.)
1524
1525    we then can use only the following expression to finish the alising checks
1526    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1527
1528    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1529    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1530
1531    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1532    basic address.  */
1533
1534 void
1535 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1536                                poly_uint64)
1537 {
1538   if (alias_pairs->is_empty ())
1539     return;
1540
1541   /* Canonicalize each pair so that the base components are ordered wrt
1542      data_ref_compare_tree.  This allows the loop below to merge more
1543      cases.  */
1544   unsigned int i;
1545   dr_with_seg_len_pair_t *alias_pair;
1546   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1547     {
1548       data_reference_p dr_a = alias_pair->first.dr;
1549       data_reference_p dr_b = alias_pair->second.dr;
1550       int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1551                                             DR_BASE_ADDRESS (dr_b));
1552       if (comp_res == 0)
1553         comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1554       if (comp_res == 0)
1555         comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1556       if (comp_res > 0)
1557         {
1558           std::swap (alias_pair->first, alias_pair->second);
1559           alias_pair->flags |= DR_ALIAS_SWAPPED;
1560         }
1561       else
1562         alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1563     }
1564
1565   /* Sort the collected data ref pairs so that we can scan them once to
1566      combine all possible aliasing checks.  */
1567   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1568
1569   /* Scan the sorted dr pairs and check if we can combine alias checks
1570      of two neighboring dr pairs.  */
1571   unsigned int last = 0;
1572   for (i = 1; i < alias_pairs->length (); ++i)
1573     {
1574       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1575       dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1576       dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1577
1578       dr_with_seg_len *dr_a1 = &alias_pair1->first;
1579       dr_with_seg_len *dr_b1 = &alias_pair1->second;
1580       dr_with_seg_len *dr_a2 = &alias_pair2->first;
1581       dr_with_seg_len *dr_b2 = &alias_pair2->second;
1582
1583       /* Remove duplicate data ref pairs.  */
1584       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1585         {
1586           if (dump_enabled_p ())
1587             dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1588                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1589                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1590           alias_pair1->flags |= alias_pair2->flags;
1591           continue;
1592         }
1593
1594       /* Assume that we won't be able to merge the pairs, then correct
1595          if we do.  */
1596       last += 1;
1597       if (last != i)
1598         (*alias_pairs)[last] = (*alias_pairs)[i];
1599
1600       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1601         {
1602           /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1603              and DR_A1 and DR_A2 are two consecutive memrefs.  */
1604           if (*dr_a1 == *dr_a2)
1605             {
1606               std::swap (dr_a1, dr_b1);
1607               std::swap (dr_a2, dr_b2);
1608             }
1609
1610           poly_int64 init_a1, init_a2;
1611           /* Only consider cases in which the distance between the initial
1612              DR_A1 and the initial DR_A2 is known at compile time.  */
1613           if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1614                                 DR_BASE_ADDRESS (dr_a2->dr), 0)
1615               || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1616                                    DR_OFFSET (dr_a2->dr), 0)
1617               || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1618               || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1619             continue;
1620
1621           /* Don't combine if we can't tell which one comes first.  */
1622           if (!ordered_p (init_a1, init_a2))
1623             continue;
1624
1625           /* Work out what the segment length would be if we did combine
1626              DR_A1 and DR_A2:
1627
1628              - If DR_A1 and DR_A2 have equal lengths, that length is
1629                also the combined length.
1630
1631              - If DR_A1 and DR_A2 both have negative "lengths", the combined
1632                length is the lower bound on those lengths.
1633
1634              - If DR_A1 and DR_A2 both have positive lengths, the combined
1635                length is the upper bound on those lengths.
1636
1637              Other cases are unlikely to give a useful combination.
1638
1639              The lengths both have sizetype, so the sign is taken from
1640              the step instead.  */
1641           poly_uint64 new_seg_len = 0;
1642           bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1643                                                  dr_a2->seg_len, 0);
1644           if (new_seg_len_p)
1645             {
1646               poly_uint64 seg_len_a1, seg_len_a2;
1647               if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1648                   || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1649                 continue;
1650
1651               tree indicator_a = dr_direction_indicator (dr_a1->dr);
1652               if (TREE_CODE (indicator_a) != INTEGER_CST)
1653                 continue;
1654
1655               tree indicator_b = dr_direction_indicator (dr_a2->dr);
1656               if (TREE_CODE (indicator_b) != INTEGER_CST)
1657                 continue;
1658
1659               int sign_a = tree_int_cst_sgn (indicator_a);
1660               int sign_b = tree_int_cst_sgn (indicator_b);
1661
1662               if (sign_a <= 0 && sign_b <= 0)
1663                 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1664               else if (sign_a >= 0 && sign_b >= 0)
1665                 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1666               else
1667                 continue;
1668             }
1669           /* At this point we're committed to merging the refs.  */
1670
1671           /* Make sure dr_a1 starts left of dr_a2.  */
1672           if (maybe_gt (init_a1, init_a2))
1673             {
1674               std::swap (*dr_a1, *dr_a2);
1675               std::swap (init_a1, init_a2);
1676             }
1677
1678           /* The DR_Bs are equal, so only the DR_As can introduce
1679              mixed steps.  */
1680           if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1681             alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1682
1683           if (new_seg_len_p)
1684             {
1685               dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1686                                               new_seg_len);
1687               dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1688             }
1689
1690           /* This is always positive due to the swap above.  */
1691           poly_uint64 diff = init_a2 - init_a1;
1692
1693           /* The new check will start at DR_A1.  Make sure that its access
1694              size encompasses the initial DR_A2.  */
1695           if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1696             {
1697               dr_a1->access_size = upper_bound (dr_a1->access_size,
1698                                                 diff + dr_a2->access_size);
1699               unsigned int new_align = known_alignment (dr_a1->access_size);
1700               dr_a1->align = MIN (dr_a1->align, new_align);
1701             }
1702           if (dump_enabled_p ())
1703             dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1704                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1705                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1706           alias_pair1->flags |= alias_pair2->flags;
1707           last -= 1;
1708         }
1709     }
1710   alias_pairs->truncate (last + 1);
1711
1712   /* Try to restore the original dr_with_seg_len order within each
1713      dr_with_seg_len_pair_t.  If we ended up combining swapped and
1714      unswapped pairs into the same check, we have to invalidate any
1715      RAW, WAR and WAW information for it.  */
1716   if (dump_enabled_p ())
1717     dump_printf (MSG_NOTE, "merged alias checks:\n");
1718   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1719     {
1720       unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1721       unsigned int swapped = (alias_pair->flags & swap_mask);
1722       if (swapped == DR_ALIAS_SWAPPED)
1723         std::swap (alias_pair->first, alias_pair->second);
1724       else if (swapped != DR_ALIAS_UNSWAPPED)
1725         alias_pair->flags |= DR_ALIAS_ARBITRARY;
1726       alias_pair->flags &= ~swap_mask;
1727       if (dump_enabled_p ())
1728         dump_alias_pair (alias_pair, "  ");
1729     }
1730 }
1731
1732 /* A subroutine of create_intersect_range_checks, with a subset of the
1733    same arguments.  Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
1734    to optimize cases in which the references form a simple RAW, WAR or
1735    WAR dependence.  */
1736
1737 static bool
1738 create_ifn_alias_checks (tree *cond_expr,
1739                          const dr_with_seg_len_pair_t &alias_pair)
1740 {
1741   const dr_with_seg_len& dr_a = alias_pair.first;
1742   const dr_with_seg_len& dr_b = alias_pair.second;
1743
1744   /* Check for cases in which:
1745
1746      (a) we have a known RAW, WAR or WAR dependence
1747      (b) the accesses are well-ordered in both the original and new code
1748          (see the comment above the DR_ALIAS_* flags for details); and
1749      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
1750   if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
1751     return false;
1752
1753   /* Make sure that both DRs access the same pattern of bytes,
1754      with a constant length and step.  */
1755   poly_uint64 seg_len;
1756   if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
1757       || !poly_int_tree_p (dr_a.seg_len, &seg_len)
1758       || maybe_ne (dr_a.access_size, dr_b.access_size)
1759       || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
1760       || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
1761     return false;
1762
1763   unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
1764   tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
1765   tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
1766
1767   /* See whether the target suports what we want to do.  WAW checks are
1768      equivalent to WAR checks here.  */
1769   internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
1770                      ? IFN_CHECK_RAW_PTRS
1771                      : IFN_CHECK_WAR_PTRS);
1772   unsigned int align = MIN (dr_a.align, dr_b.align);
1773   poly_uint64 full_length = seg_len + bytes;
1774   if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
1775                                            full_length, align))
1776     {
1777       full_length = seg_len + dr_a.access_size;
1778       if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
1779                                                full_length, align))
1780         return false;
1781     }
1782
1783   /* Commit to using this form of test.  */
1784   addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
1785   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
1786
1787   addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
1788   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
1789
1790   *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
1791                                              ifn, boolean_type_node,
1792                                              4, addr_a, addr_b,
1793                                              size_int (full_length),
1794                                              size_int (align));
1795
1796   if (dump_enabled_p ())
1797     {
1798       if (ifn == IFN_CHECK_RAW_PTRS)
1799         dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
1800       else
1801         dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
1802     }
1803   return true;
1804 }
1805
1806 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
1807    free of aliases, using a condition based on index values instead
1808    of a condition based on addresses.  Return true on success,
1809    storing the condition in *COND_EXPR.
1810
1811    This can only be done if the two data references in ALIAS_PAIR access
1812    the same array object and the index is the only difference.  For example,
1813    if the two data references are DR_A and DR_B:
1814
1815                        DR_A                           DR_B
1816       data-ref         arr[i]                         arr[j]
1817       base_object      arr                            arr
1818       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
1819
1820    The addresses and their index are like:
1821
1822         |<- ADDR_A    ->|          |<- ADDR_B    ->|
1823      ------------------------------------------------------->
1824         |   |   |   |   |          |   |   |   |   |
1825      ------------------------------------------------------->
1826         i_0 ...         i_0+4      j_0 ...         j_0+4
1827
1828    We can create expression based on index rather than address:
1829
1830      (unsigned) (i_0 - j_0 + 3) <= 6
1831
1832    i.e. the indices are less than 4 apart.
1833
1834    Note evolution step of index needs to be considered in comparison.  */
1835
1836 static bool
1837 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
1838                                      const dr_with_seg_len_pair_t &alias_pair)
1839 {
1840   const dr_with_seg_len &dr_a = alias_pair.first;
1841   const dr_with_seg_len &dr_b = alias_pair.second;
1842   if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
1843       || integer_zerop (DR_STEP (dr_a.dr))
1844       || integer_zerop (DR_STEP (dr_b.dr))
1845       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1846     return false;
1847
1848   poly_uint64 seg_len1, seg_len2;
1849   if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
1850       || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
1851     return false;
1852
1853   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1854     return false;
1855
1856   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1857     return false;
1858
1859   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1860     return false;
1861
1862   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1863
1864   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1865   unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
1866   if (neg_step)
1867     {
1868       abs_step = -abs_step;
1869       seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
1870       seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
1871     }
1872
1873   /* Infer the number of iterations with which the memory segment is accessed
1874      by DR.  In other words, alias is checked if memory segment accessed by
1875      DR_A in some iterations intersect with memory segment accessed by DR_B
1876      in the same amount iterations.
1877      Note segnment length is a linear function of number of iterations with
1878      DR_STEP as the coefficient.  */
1879   poly_uint64 niter_len1, niter_len2;
1880   if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
1881       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
1882     return false;
1883
1884   /* Divide each access size by the byte step, rounding up.  */
1885   poly_uint64 niter_access1, niter_access2;
1886   if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
1887                         abs_step, &niter_access1)
1888       || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
1889                            abs_step, &niter_access2))
1890     return false;
1891
1892   bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
1893
1894   unsigned int i;
1895   for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1896     {
1897       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1898       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1899       /* Two indices must be the same if they are not scev, or not scev wrto
1900          current loop being vecorized.  */
1901       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1902           || TREE_CODE (access2) != POLYNOMIAL_CHREC
1903           || CHREC_VARIABLE (access1) != (unsigned)loop->num
1904           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1905         {
1906           if (operand_equal_p (access1, access2, 0))
1907             continue;
1908
1909           return false;
1910         }
1911       /* The two indices must have the same step.  */
1912       if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1913         return false;
1914
1915       tree idx_step = CHREC_RIGHT (access1);
1916       /* Index must have const step, otherwise DR_STEP won't be constant.  */
1917       gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1918       /* Index must evaluate in the same direction as DR.  */
1919       gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1920
1921       tree min1 = CHREC_LEFT (access1);
1922       tree min2 = CHREC_LEFT (access2);
1923       if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1924         return false;
1925
1926       /* Ideally, alias can be checked against loop's control IV, but we
1927          need to prove linear mapping between control IV and reference
1928          index.  Although that should be true, we check against (array)
1929          index of data reference.  Like segment length, index length is
1930          linear function of the number of iterations with index_step as
1931          the coefficient, i.e, niter_len * idx_step.  */
1932       offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
1933                                                   SIGNED);
1934       if (neg_step)
1935         abs_idx_step = -abs_idx_step;
1936       poly_offset_int idx_len1 = abs_idx_step * niter_len1;
1937       poly_offset_int idx_len2 = abs_idx_step * niter_len2;
1938       poly_offset_int idx_access1 = abs_idx_step * niter_access1;
1939       poly_offset_int idx_access2 = abs_idx_step * niter_access2;
1940
1941       gcc_assert (known_ge (idx_len1, 0)
1942                   && known_ge (idx_len2, 0)
1943                   && known_ge (idx_access1, 0)
1944                   && known_ge (idx_access2, 0));
1945
1946       /* Each access has the following pattern, with lengths measured
1947          in units of INDEX:
1948
1949               <-- idx_len -->
1950               <--- A: -ve step --->
1951               +-----+-------+-----+-------+-----+
1952               | n-1 | ..... |  0  | ..... | n-1 |
1953               +-----+-------+-----+-------+-----+
1954                             <--- B: +ve step --->
1955                             <-- idx_len -->
1956                             |
1957                            min
1958
1959          where "n" is the number of scalar iterations covered by the segment
1960          and where each access spans idx_access units.
1961
1962          A is the range of bytes accessed when the step is negative,
1963          B is the range when the step is positive.
1964
1965          When checking for general overlap, we need to test whether
1966          the range:
1967
1968            [min1 + low_offset1, min2 + high_offset1 + idx_access1 - 1]
1969
1970          overlaps:
1971
1972            [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
1973
1974          where:
1975
1976             low_offsetN = +ve step ? 0 : -idx_lenN;
1977            high_offsetN = +ve step ? idx_lenN : 0;
1978
1979          This is equivalent to testing whether:
1980
1981            min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
1982            && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
1983
1984          Converting this into a single test, there is an overlap if:
1985
1986            0 <= min2 - min1 + bias <= limit
1987
1988          where  bias = high_offset2 + idx_access2 - 1 - low_offset1
1989                limit = (high_offset1 - low_offset1 + idx_access1 - 1)
1990                      + (high_offset2 - low_offset2 + idx_access2 - 1)
1991           i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
1992
1993          Combining the tests requires limit to be computable in an unsigned
1994          form of the index type; if it isn't, we fall back to the usual
1995          pointer-based checks.
1996
1997          We can do better if DR_B is a write and if DR_A and DR_B are
1998          well-ordered in both the original and the new code (see the
1999          comment above the DR_ALIAS_* flags for details).  In this case
2000          we know that for each i in [0, n-1], the write performed by
2001          access i of DR_B occurs after access numbers j<=i of DR_A in
2002          both the original and the new code.  Any write or anti
2003          dependencies wrt those DR_A accesses are therefore maintained.
2004
2005          We just need to make sure that each individual write in DR_B does not
2006          overlap any higher-indexed access in DR_A; such DR_A accesses happen
2007          after the DR_B access in the original code but happen before it in
2008          the new code.
2009
2010          We know the steps for both accesses are equal, so by induction, we
2011          just need to test whether the first write of DR_B overlaps a later
2012          access of DR_A.  In other words, we need to move min1 along by
2013          one iteration:
2014
2015            min1' = min1 + idx_step
2016
2017          and use the ranges:
2018
2019            [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2020
2021          and:
2022
2023            [min2, min2 + idx_access2 - 1]
2024
2025          where:
2026
2027             low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2028            high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0.  */
2029       if (waw_or_war_p)
2030         idx_len1 -= abs_idx_step;
2031
2032       poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2033       if (!waw_or_war_p)
2034         limit += idx_len2;
2035
2036       tree utype = unsigned_type_for (TREE_TYPE (min1));
2037       if (!wi::fits_to_tree_p (limit, utype))
2038         return false;
2039
2040       poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2041       poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2042       poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2043       /* Equivalent to adding IDX_STEP to MIN1.  */
2044       if (waw_or_war_p)
2045         bias -= wi::to_offset (idx_step);
2046
2047       tree subject = fold_build2 (MINUS_EXPR, utype,
2048                                   fold_convert (utype, min2),
2049                                   fold_convert (utype, min1));
2050       subject = fold_build2 (PLUS_EXPR, utype, subject,
2051                              wide_int_to_tree (utype, bias));
2052       tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2053                                          wide_int_to_tree (utype, limit));
2054       if (*cond_expr)
2055         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2056                                   *cond_expr, part_cond_expr);
2057       else
2058         *cond_expr = part_cond_expr;
2059     }
2060   if (dump_enabled_p ())
2061     {
2062       if (waw_or_war_p)
2063         dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2064       else
2065         dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2066     }
2067   return true;
2068 }
2069
2070 /* A subroutine of create_intersect_range_checks, with a subset of the
2071    same arguments.  Try to optimize cases in which the second access
2072    is a write and in which some overlap is valid.  */
2073
2074 static bool
2075 create_waw_or_war_checks (tree *cond_expr,
2076                           const dr_with_seg_len_pair_t &alias_pair)
2077 {
2078   const dr_with_seg_len& dr_a = alias_pair.first;
2079   const dr_with_seg_len& dr_b = alias_pair.second;
2080
2081   /* Check for cases in which:
2082
2083      (a) DR_B is always a write;
2084      (b) the accesses are well-ordered in both the original and new code
2085          (see the comment above the DR_ALIAS_* flags for details); and
2086      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
2087   if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2088     return false;
2089
2090   /* Check for equal (but possibly variable) steps.  */
2091   tree step = DR_STEP (dr_a.dr);
2092   if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2093     return false;
2094
2095   /* Make sure that we can operate on sizetype without loss of precision.  */
2096   tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2097   if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2098     return false;
2099
2100   /* All addresses involved are known to have a common alignment ALIGN.
2101      We can therefore subtract ALIGN from an exclusive endpoint to get
2102      an inclusive endpoint.  In the best (and common) case, ALIGN is the
2103      same as the access sizes of both DRs, and so subtracting ALIGN
2104      cancels out the addition of an access size.  */
2105   unsigned int align = MIN (dr_a.align, dr_b.align);
2106   poly_uint64 last_chunk_a = dr_a.access_size - align;
2107   poly_uint64 last_chunk_b = dr_b.access_size - align;
2108
2109   /* Get a boolean expression that is true when the step is negative.  */
2110   tree indicator = dr_direction_indicator (dr_a.dr);
2111   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2112                                fold_convert (ssizetype, indicator),
2113                                ssize_int (0));
2114
2115   /* Get lengths in sizetype.  */
2116   tree seg_len_a
2117     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2118   step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2119
2120   /* Each access has the following pattern:
2121
2122           <- |seg_len| ->
2123           <--- A: -ve step --->
2124           +-----+-------+-----+-------+-----+
2125           | n-1 | ..... |  0  | ..... | n-1 |
2126           +-----+-------+-----+-------+-----+
2127                         <--- B: +ve step --->
2128                         <- |seg_len| ->
2129                         |
2130                    base address
2131
2132      where "n" is the number of scalar iterations covered by the segment.
2133
2134      A is the range of bytes accessed when the step is negative,
2135      B is the range when the step is positive.
2136
2137      We know that DR_B is a write.  We also know (from checking that
2138      DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2139      the write performed by access i of DR_B occurs after access numbers
2140      j<=i of DR_A in both the original and the new code.  Any write or
2141      anti dependencies wrt those DR_A accesses are therefore maintained.
2142
2143      We just need to make sure that each individual write in DR_B does not
2144      overlap any higher-indexed access in DR_A; such DR_A accesses happen
2145      after the DR_B access in the original code but happen before it in
2146      the new code.
2147
2148      We know the steps for both accesses are equal, so by induction, we
2149      just need to test whether the first write of DR_B overlaps a later
2150      access of DR_A.  In other words, we need to move addr_a along by
2151      one iteration:
2152
2153        addr_a' = addr_a + step
2154
2155      and check whether:
2156
2157        [addr_b, addr_b + last_chunk_b]
2158
2159      overlaps:
2160
2161        [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2162
2163      where [low_offset_a, high_offset_a] spans accesses [1, n-1].  I.e.:
2164
2165         low_offset_a = +ve step ? 0 : seg_len_a - step
2166        high_offset_a = +ve step ? seg_len_a - step : 0
2167
2168      This is equivalent to testing whether:
2169
2170        addr_a' + low_offset_a <= addr_b + last_chunk_b
2171        && addr_b <= addr_a' + high_offset_a + last_chunk_a
2172
2173      Converting this into a single test, there is an overlap if:
2174
2175        0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2176
2177      where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2178
2179      If DR_A is performed, limit + |step| - last_chunk_b is known to be
2180      less than the size of the object underlying DR_A.  We also know
2181      that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2182      guaranteed at compile time.  There can therefore be no overflow if
2183      "limit" is calculated in an unsigned type with pointer precision.  */
2184   tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2185                                          DR_OFFSET (dr_a.dr));
2186   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2187
2188   tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2189                                          DR_OFFSET (dr_b.dr));
2190   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2191
2192   /* Advance ADDR_A by one iteration and adjust the length to compensate.  */
2193   addr_a = fold_build_pointer_plus (addr_a, step);
2194   tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2195                                            seg_len_a, step);
2196   if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2197     seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2198
2199   tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2200                                    seg_len_a_minus_step, size_zero_node);
2201   if (!CONSTANT_CLASS_P (low_offset_a))
2202     low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2203
2204   /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2205      but it's usually more efficient to reuse the LOW_OFFSET_A result.  */
2206   tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2207                                     low_offset_a);
2208
2209   /* The amount added to addr_b - addr_a'.  */
2210   tree bias = fold_build2 (MINUS_EXPR, sizetype,
2211                            size_int (last_chunk_b), low_offset_a);
2212
2213   tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2214   limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2215                        size_int (last_chunk_a + last_chunk_b));
2216
2217   tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
2218   subject = fold_build2 (PLUS_EXPR, sizetype,
2219                          fold_convert (sizetype, subject), bias);
2220
2221   *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2222   if (dump_enabled_p ())
2223     dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2224   return true;
2225 }
2226
2227 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2228    every address ADDR accessed by D:
2229
2230      *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2231
2232    In this case, every element accessed by D is aligned to at least
2233    ALIGN bytes.
2234
2235    If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2236
2237      *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT.  */
2238
2239 static void
2240 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2241                      tree *seg_max_out, HOST_WIDE_INT align)
2242 {
2243   /* Each access has the following pattern:
2244
2245           <- |seg_len| ->
2246           <--- A: -ve step --->
2247           +-----+-------+-----+-------+-----+
2248           | n-1 | ,.... |  0  | ..... | n-1 |
2249           +-----+-------+-----+-------+-----+
2250                         <--- B: +ve step --->
2251                         <- |seg_len| ->
2252                         |
2253                    base address
2254
2255      where "n" is the number of scalar iterations covered by the segment.
2256      (This should be VF for a particular pair if we know that both steps
2257      are the same, otherwise it will be the full number of scalar loop
2258      iterations.)
2259
2260      A is the range of bytes accessed when the step is negative,
2261      B is the range when the step is positive.
2262
2263      If the access size is "access_size" bytes, the lowest addressed byte is:
2264
2265          base + (step < 0 ? seg_len : 0)   [LB]
2266
2267      and the highest addressed byte is always below:
2268
2269          base + (step < 0 ? 0 : seg_len) + access_size   [UB]
2270
2271      Thus:
2272
2273          LB <= ADDR < UB
2274
2275      If ALIGN is nonzero, all three values are aligned to at least ALIGN
2276      bytes, so:
2277
2278          LB <= ADDR <= UB - ALIGN
2279
2280      where "- ALIGN" folds naturally with the "+ access_size" and often
2281      cancels it out.
2282
2283      We don't try to simplify LB and UB beyond this (e.g. by using
2284      MIN and MAX based on whether seg_len rather than the stride is
2285      negative) because it is possible for the absolute size of the
2286      segment to overflow the range of a ssize_t.
2287
2288      Keeping the pointer_plus outside of the cond_expr should allow
2289      the cond_exprs to be shared with other alias checks.  */
2290   tree indicator = dr_direction_indicator (d.dr);
2291   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2292                                fold_convert (ssizetype, indicator),
2293                                ssize_int (0));
2294   tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2295                                             DR_OFFSET (d.dr));
2296   addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2297   tree seg_len
2298     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2299
2300   tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2301                                 seg_len, size_zero_node);
2302   tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2303                                 size_zero_node, seg_len);
2304   max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2305                            size_int (d.access_size - align));
2306
2307   *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2308   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2309 }
2310
2311 /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2312    storing the condition in *COND_EXPR.  The fallback is to generate a
2313    a test that the two accesses do not overlap:
2314
2315      end_a <= start_b || end_b <= start_a.  */
2316
2317 static void
2318 create_intersect_range_checks (class loop *loop, tree *cond_expr,
2319                                const dr_with_seg_len_pair_t &alias_pair)
2320 {
2321   const dr_with_seg_len& dr_a = alias_pair.first;
2322   const dr_with_seg_len& dr_b = alias_pair.second;
2323   *cond_expr = NULL_TREE;
2324   if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2325     return;
2326
2327   if (create_ifn_alias_checks (cond_expr, alias_pair))
2328     return;
2329
2330   if (create_waw_or_war_checks (cond_expr, alias_pair))
2331     return;
2332
2333   unsigned HOST_WIDE_INT min_align;
2334   tree_code cmp_code;
2335   /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2336      are equivalent.  This is just an optimization heuristic.  */
2337   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2338       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2339     {
2340       /* In this case adding access_size to seg_len is likely to give
2341          a simple X * step, where X is either the number of scalar
2342          iterations or the vectorization factor.  We're better off
2343          keeping that, rather than subtracting an alignment from it.
2344
2345          In this case the maximum values are exclusive and so there is
2346          no alias if the maximum of one segment equals the minimum
2347          of another.  */
2348       min_align = 0;
2349       cmp_code = LE_EXPR;
2350     }
2351   else
2352     {
2353       /* Calculate the minimum alignment shared by all four pointers,
2354          then arrange for this alignment to be subtracted from the
2355          exclusive maximum values to get inclusive maximum values.
2356          This "- min_align" is cumulative with a "+ access_size"
2357          in the calculation of the maximum values.  In the best
2358          (and common) case, the two cancel each other out, leaving
2359          us with an inclusive bound based only on seg_len.  In the
2360          worst case we're simply adding a smaller number than before.
2361
2362          Because the maximum values are inclusive, there is an alias
2363          if the maximum value of one segment is equal to the minimum
2364          value of the other.  */
2365       min_align = MIN (dr_a.align, dr_b.align);
2366       cmp_code = LT_EXPR;
2367     }
2368
2369   tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2370   get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2371   get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2372
2373   *cond_expr
2374     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2375         fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2376         fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2377   if (dump_enabled_p ())
2378     dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2379 }
2380
2381 /* Create a conditional expression that represents the run-time checks for
2382    overlapping of address ranges represented by a list of data references
2383    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
2384    COND_EXPR is the conditional expression to be used in the if statement
2385    that controls which version of the loop gets executed at runtime.  */
2386
2387 void
2388 create_runtime_alias_checks (class loop *loop,
2389                              vec<dr_with_seg_len_pair_t> *alias_pairs,
2390                              tree * cond_expr)
2391 {
2392   tree part_cond_expr;
2393
2394   fold_defer_overflow_warnings ();
2395   dr_with_seg_len_pair_t *alias_pair;
2396   unsigned int i;
2397   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
2398     {
2399       gcc_assert (alias_pair->flags);
2400       if (dump_enabled_p ())
2401         dump_printf (MSG_NOTE,
2402                      "create runtime check for data references %T and %T\n",
2403                      DR_REF (alias_pair->first.dr),
2404                      DR_REF (alias_pair->second.dr));
2405
2406       /* Create condition expression for each pair data references.  */
2407       create_intersect_range_checks (loop, &part_cond_expr, *alias_pair);
2408       if (*cond_expr)
2409         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2410                                   *cond_expr, part_cond_expr);
2411       else
2412         *cond_expr = part_cond_expr;
2413     }
2414   fold_undefer_and_ignore_overflow_warnings ();
2415 }
2416
2417 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2418    expressions.  */
2419 static bool
2420 dr_equal_offsets_p1 (tree offset1, tree offset2)
2421 {
2422   bool res;
2423
2424   STRIP_NOPS (offset1);
2425   STRIP_NOPS (offset2);
2426
2427   if (offset1 == offset2)
2428     return true;
2429
2430   if (TREE_CODE (offset1) != TREE_CODE (offset2)
2431       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2432     return false;
2433
2434   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2435                              TREE_OPERAND (offset2, 0));
2436
2437   if (!res || !BINARY_CLASS_P (offset1))
2438     return res;
2439
2440   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2441                              TREE_OPERAND (offset2, 1));
2442
2443   return res;
2444 }
2445
2446 /* Check if DRA and DRB have equal offsets.  */
2447 bool
2448 dr_equal_offsets_p (struct data_reference *dra,
2449                     struct data_reference *drb)
2450 {
2451   tree offset1, offset2;
2452
2453   offset1 = DR_OFFSET (dra);
2454   offset2 = DR_OFFSET (drb);
2455
2456   return dr_equal_offsets_p1 (offset1, offset2);
2457 }
2458
2459 /* Returns true if FNA == FNB.  */
2460
2461 static bool
2462 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2463 {
2464   unsigned i, n = fna.length ();
2465
2466   if (n != fnb.length ())
2467     return false;
2468
2469   for (i = 0; i < n; i++)
2470     if (!operand_equal_p (fna[i], fnb[i], 0))
2471       return false;
2472
2473   return true;
2474 }
2475
2476 /* If all the functions in CF are the same, returns one of them,
2477    otherwise returns NULL.  */
2478
2479 static affine_fn
2480 common_affine_function (conflict_function *cf)
2481 {
2482   unsigned i;
2483   affine_fn comm;
2484
2485   if (!CF_NONTRIVIAL_P (cf))
2486     return affine_fn ();
2487
2488   comm = cf->fns[0];
2489
2490   for (i = 1; i < cf->n; i++)
2491     if (!affine_function_equal_p (comm, cf->fns[i]))
2492       return affine_fn ();
2493
2494   return comm;
2495 }
2496
2497 /* Returns the base of the affine function FN.  */
2498
2499 static tree
2500 affine_function_base (affine_fn fn)
2501 {
2502   return fn[0];
2503 }
2504
2505 /* Returns true if FN is a constant.  */
2506
2507 static bool
2508 affine_function_constant_p (affine_fn fn)
2509 {
2510   unsigned i;
2511   tree coef;
2512
2513   for (i = 1; fn.iterate (i, &coef); i++)
2514     if (!integer_zerop (coef))
2515       return false;
2516
2517   return true;
2518 }
2519
2520 /* Returns true if FN is the zero constant function.  */
2521
2522 static bool
2523 affine_function_zero_p (affine_fn fn)
2524 {
2525   return (integer_zerop (affine_function_base (fn))
2526           && affine_function_constant_p (fn));
2527 }
2528
2529 /* Returns a signed integer type with the largest precision from TA
2530    and TB.  */
2531
2532 static tree
2533 signed_type_for_types (tree ta, tree tb)
2534 {
2535   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2536     return signed_type_for (ta);
2537   else
2538     return signed_type_for (tb);
2539 }
2540
2541 /* Applies operation OP on affine functions FNA and FNB, and returns the
2542    result.  */
2543
2544 static affine_fn
2545 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2546 {
2547   unsigned i, n, m;
2548   affine_fn ret;
2549   tree coef;
2550
2551   if (fnb.length () > fna.length ())
2552     {
2553       n = fna.length ();
2554       m = fnb.length ();
2555     }
2556   else
2557     {
2558       n = fnb.length ();
2559       m = fna.length ();
2560     }
2561
2562   ret.create (m);
2563   for (i = 0; i < n; i++)
2564     {
2565       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2566                                          TREE_TYPE (fnb[i]));
2567       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2568     }
2569
2570   for (; fna.iterate (i, &coef); i++)
2571     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2572                                  coef, integer_zero_node));
2573   for (; fnb.iterate (i, &coef); i++)
2574     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2575                                  integer_zero_node, coef));
2576
2577   return ret;
2578 }
2579
2580 /* Returns the sum of affine functions FNA and FNB.  */
2581
2582 static affine_fn
2583 affine_fn_plus (affine_fn fna, affine_fn fnb)
2584 {
2585   return affine_fn_op (PLUS_EXPR, fna, fnb);
2586 }
2587
2588 /* Returns the difference of affine functions FNA and FNB.  */
2589
2590 static affine_fn
2591 affine_fn_minus (affine_fn fna, affine_fn fnb)
2592 {
2593   return affine_fn_op (MINUS_EXPR, fna, fnb);
2594 }
2595
2596 /* Frees affine function FN.  */
2597
2598 static void
2599 affine_fn_free (affine_fn fn)
2600 {
2601   fn.release ();
2602 }
2603
2604 /* Determine for each subscript in the data dependence relation DDR
2605    the distance.  */
2606
2607 static void
2608 compute_subscript_distance (struct data_dependence_relation *ddr)
2609 {
2610   conflict_function *cf_a, *cf_b;
2611   affine_fn fn_a, fn_b, diff;
2612
2613   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2614     {
2615       unsigned int i;
2616
2617       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2618         {
2619           struct subscript *subscript;
2620
2621           subscript = DDR_SUBSCRIPT (ddr, i);
2622           cf_a = SUB_CONFLICTS_IN_A (subscript);
2623           cf_b = SUB_CONFLICTS_IN_B (subscript);
2624
2625           fn_a = common_affine_function (cf_a);
2626           fn_b = common_affine_function (cf_b);
2627           if (!fn_a.exists () || !fn_b.exists ())
2628             {
2629               SUB_DISTANCE (subscript) = chrec_dont_know;
2630               return;
2631             }
2632           diff = affine_fn_minus (fn_a, fn_b);
2633
2634           if (affine_function_constant_p (diff))
2635             SUB_DISTANCE (subscript) = affine_function_base (diff);
2636           else
2637             SUB_DISTANCE (subscript) = chrec_dont_know;
2638
2639           affine_fn_free (diff);
2640         }
2641     }
2642 }
2643
2644 /* Returns the conflict function for "unknown".  */
2645
2646 static conflict_function *
2647 conflict_fn_not_known (void)
2648 {
2649   conflict_function *fn = XCNEW (conflict_function);
2650   fn->n = NOT_KNOWN;
2651
2652   return fn;
2653 }
2654
2655 /* Returns the conflict function for "independent".  */
2656
2657 static conflict_function *
2658 conflict_fn_no_dependence (void)
2659 {
2660   conflict_function *fn = XCNEW (conflict_function);
2661   fn->n = NO_DEPENDENCE;
2662
2663   return fn;
2664 }
2665
2666 /* Returns true if the address of OBJ is invariant in LOOP.  */
2667
2668 static bool
2669 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2670 {
2671   while (handled_component_p (obj))
2672     {
2673       if (TREE_CODE (obj) == ARRAY_REF)
2674         {
2675           for (int i = 1; i < 4; ++i)
2676             if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2677                                                         loop->num))
2678               return false;
2679         }
2680       else if (TREE_CODE (obj) == COMPONENT_REF)
2681         {
2682           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2683                                                       loop->num))
2684             return false;
2685         }
2686       obj = TREE_OPERAND (obj, 0);
2687     }
2688
2689   if (!INDIRECT_REF_P (obj)
2690       && TREE_CODE (obj) != MEM_REF)
2691     return true;
2692
2693   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2694                                                   loop->num);
2695 }
2696
2697 /* Returns false if we can prove that data references A and B do not alias,
2698    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2699    considered.  */
2700
2701 bool
2702 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2703                 class loop *loop_nest)
2704 {
2705   tree addr_a = DR_BASE_OBJECT (a);
2706   tree addr_b = DR_BASE_OBJECT (b);
2707
2708   /* If we are not processing a loop nest but scalar code we
2709      do not need to care about possible cross-iteration dependences
2710      and thus can process the full original reference.  Do so,
2711      similar to how loop invariant motion applies extra offset-based
2712      disambiguation.  */
2713   if (!loop_nest)
2714     {
2715       aff_tree off1, off2;
2716       poly_widest_int size1, size2;
2717       get_inner_reference_aff (DR_REF (a), &off1, &size1);
2718       get_inner_reference_aff (DR_REF (b), &off2, &size2);
2719       aff_combination_scale (&off1, -1);
2720       aff_combination_add (&off2, &off1);
2721       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2722         return false;
2723     }
2724
2725   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2726       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2727       /* For cross-iteration dependences the cliques must be valid for the
2728          whole loop, not just individual iterations.  */
2729       && (!loop_nest
2730           || MR_DEPENDENCE_CLIQUE (addr_a) == 1
2731           || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
2732       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2733       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2734     return false;
2735
2736   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2737      do not know the size of the base-object.  So we cannot do any
2738      offset/overlap based analysis but have to rely on points-to
2739      information only.  */
2740   if (TREE_CODE (addr_a) == MEM_REF
2741       && (DR_UNCONSTRAINED_BASE (a)
2742           || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2743     {
2744       /* For true dependences we can apply TBAA.  */
2745       if (flag_strict_aliasing
2746           && DR_IS_WRITE (a) && DR_IS_READ (b)
2747           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2748                                      get_alias_set (DR_REF (b))))
2749         return false;
2750       if (TREE_CODE (addr_b) == MEM_REF)
2751         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2752                                        TREE_OPERAND (addr_b, 0));
2753       else
2754         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2755                                        build_fold_addr_expr (addr_b));
2756     }
2757   else if (TREE_CODE (addr_b) == MEM_REF
2758            && (DR_UNCONSTRAINED_BASE (b)
2759                || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
2760     {
2761       /* For true dependences we can apply TBAA.  */
2762       if (flag_strict_aliasing
2763           && DR_IS_WRITE (a) && DR_IS_READ (b)
2764           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2765                                      get_alias_set (DR_REF (b))))
2766         return false;
2767       if (TREE_CODE (addr_a) == MEM_REF)
2768         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2769                                        TREE_OPERAND (addr_b, 0));
2770       else
2771         return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
2772                                        TREE_OPERAND (addr_b, 0));
2773     }
2774
2775   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2776      that is being subsetted in the loop nest.  */
2777   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2778     return refs_output_dependent_p (addr_a, addr_b);
2779   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2780     return refs_anti_dependent_p (addr_a, addr_b);
2781   return refs_may_alias_p (addr_a, addr_b);
2782 }
2783
2784 /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
2785    if it is meaningful to compare their associated access functions
2786    when checking for dependencies.  */
2787
2788 static bool
2789 access_fn_components_comparable_p (tree ref_a, tree ref_b)
2790 {
2791   /* Allow pairs of component refs from the following sets:
2792
2793        { REALPART_EXPR, IMAGPART_EXPR }
2794        { COMPONENT_REF }
2795        { ARRAY_REF }.  */
2796   tree_code code_a = TREE_CODE (ref_a);
2797   tree_code code_b = TREE_CODE (ref_b);
2798   if (code_a == IMAGPART_EXPR)
2799     code_a = REALPART_EXPR;
2800   if (code_b == IMAGPART_EXPR)
2801     code_b = REALPART_EXPR;
2802   if (code_a != code_b)
2803     return false;
2804
2805   if (TREE_CODE (ref_a) == COMPONENT_REF)
2806     /* ??? We cannot simply use the type of operand #0 of the refs here as
2807        the Fortran compiler smuggles type punning into COMPONENT_REFs.
2808        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
2809     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
2810             == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
2811
2812   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
2813                              TREE_TYPE (TREE_OPERAND (ref_b, 0)));
2814 }
2815
2816 /* Initialize a data dependence relation between data accesses A and
2817    B.  NB_LOOPS is the number of loops surrounding the references: the
2818    size of the classic distance/direction vectors.  */
2819
2820 struct data_dependence_relation *
2821 initialize_data_dependence_relation (struct data_reference *a,
2822                                      struct data_reference *b,
2823                                      vec<loop_p> loop_nest)
2824 {
2825   struct data_dependence_relation *res;
2826   unsigned int i;
2827
2828   res = XCNEW (struct data_dependence_relation);
2829   DDR_A (res) = a;
2830   DDR_B (res) = b;
2831   DDR_LOOP_NEST (res).create (0);
2832   DDR_SUBSCRIPTS (res).create (0);
2833   DDR_DIR_VECTS (res).create (0);
2834   DDR_DIST_VECTS (res).create (0);
2835
2836   if (a == NULL || b == NULL)
2837     {
2838       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2839       return res;
2840     }
2841
2842   /* If the data references do not alias, then they are independent.  */
2843   if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
2844     {
2845       DDR_ARE_DEPENDENT (res) = chrec_known;
2846       return res;
2847     }
2848
2849   unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
2850   unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
2851   if (num_dimensions_a == 0 || num_dimensions_b == 0)
2852     {
2853       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2854       return res;
2855     }
2856
2857   /* For unconstrained bases, the root (highest-indexed) subscript
2858      describes a variation in the base of the original DR_REF rather
2859      than a component access.  We have no type that accurately describes
2860      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
2861      applying this subscript) so limit the search to the last real
2862      component access.
2863
2864      E.g. for:
2865
2866         void
2867         f (int a[][8], int b[][8])
2868         {
2869           for (int i = 0; i < 8; ++i)
2870             a[i * 2][0] = b[i][0];
2871         }
2872
2873      the a and b accesses have a single ARRAY_REF component reference [0]
2874      but have two subscripts.  */
2875   if (DR_UNCONSTRAINED_BASE (a))
2876     num_dimensions_a -= 1;
2877   if (DR_UNCONSTRAINED_BASE (b))
2878     num_dimensions_b -= 1;
2879
2880   /* These structures describe sequences of component references in
2881      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
2882      specific access function.  */
2883   struct {
2884     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
2885        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
2886        indices.  In C notation, these are the indices of the rightmost
2887        component references; e.g. for a sequence .b.c.d, the start
2888        index is for .d.  */
2889     unsigned int start_a;
2890     unsigned int start_b;
2891
2892     /* The sequence contains LENGTH consecutive access functions from
2893        each DR.  */
2894     unsigned int length;
2895
2896     /* The enclosing objects for the A and B sequences respectively,
2897        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
2898        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
2899     tree object_a;
2900     tree object_b;
2901   } full_seq = {}, struct_seq = {};
2902
2903   /* Before each iteration of the loop:
2904
2905      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
2906      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
2907   unsigned int index_a = 0;
2908   unsigned int index_b = 0;
2909   tree ref_a = DR_REF (a);
2910   tree ref_b = DR_REF (b);
2911
2912   /* Now walk the component references from the final DR_REFs back up to
2913      the enclosing base objects.  Each component reference corresponds
2914      to one access function in the DR, with access function 0 being for
2915      the final DR_REF and the highest-indexed access function being the
2916      one that is applied to the base of the DR.
2917
2918      Look for a sequence of component references whose access functions
2919      are comparable (see access_fn_components_comparable_p).  If more
2920      than one such sequence exists, pick the one nearest the base
2921      (which is the leftmost sequence in C notation).  Store this sequence
2922      in FULL_SEQ.
2923
2924      For example, if we have:
2925
2926         struct foo { struct bar s; ... } (*a)[10], (*b)[10];
2927
2928         A: a[0][i].s.c.d
2929         B: __real b[0][i].s.e[i].f
2930
2931      (where d is the same type as the real component of f) then the access
2932      functions would be:
2933
2934                          0   1   2   3
2935         A:              .d  .c  .s [i]
2936
2937                  0   1   2   3   4   5
2938         B:  __real  .f [i]  .e  .s [i]
2939
2940      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
2941      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
2942      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
2943      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
2944      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
2945      index foo[10] arrays, so is again comparable.  The sequence is
2946      therefore:
2947
2948         A: [1, 3]  (i.e. [i].s.c)
2949         B: [3, 5]  (i.e. [i].s.e)
2950
2951      Also look for sequences of component references whose access
2952      functions are comparable and whose enclosing objects have the same
2953      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
2954      example, STRUCT_SEQ would be:
2955
2956         A: [1, 2]  (i.e. s.c)
2957         B: [3, 4]  (i.e. s.e)  */
2958   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
2959     {
2960       /* REF_A and REF_B must be one of the component access types
2961          allowed by dr_analyze_indices.  */
2962       gcc_checking_assert (access_fn_component_p (ref_a));
2963       gcc_checking_assert (access_fn_component_p (ref_b));
2964
2965       /* Get the immediately-enclosing objects for REF_A and REF_B,
2966          i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
2967          and DR_ACCESS_FN (B, INDEX_B).  */
2968       tree object_a = TREE_OPERAND (ref_a, 0);
2969       tree object_b = TREE_OPERAND (ref_b, 0);
2970
2971       tree type_a = TREE_TYPE (object_a);
2972       tree type_b = TREE_TYPE (object_b);
2973       if (access_fn_components_comparable_p (ref_a, ref_b))
2974         {
2975           /* This pair of component accesses is comparable for dependence
2976              analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
2977              DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
2978           if (full_seq.start_a + full_seq.length != index_a
2979               || full_seq.start_b + full_seq.length != index_b)
2980             {
2981               /* The accesses don't extend the current sequence,
2982                  so start a new one here.  */
2983               full_seq.start_a = index_a;
2984               full_seq.start_b = index_b;
2985               full_seq.length = 0;
2986             }
2987
2988           /* Add this pair of references to the sequence.  */
2989           full_seq.length += 1;
2990           full_seq.object_a = object_a;
2991           full_seq.object_b = object_b;
2992
2993           /* If the enclosing objects are structures (and thus have the
2994              same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
2995           if (TREE_CODE (type_a) == RECORD_TYPE)
2996             struct_seq = full_seq;
2997
2998           /* Move to the next containing reference for both A and B.  */
2999           ref_a = object_a;
3000           ref_b = object_b;
3001           index_a += 1;
3002           index_b += 1;
3003           continue;
3004         }
3005
3006       /* Try to approach equal type sizes.  */
3007       if (!COMPLETE_TYPE_P (type_a)
3008           || !COMPLETE_TYPE_P (type_b)
3009           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3010           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3011         break;
3012
3013       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3014       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3015       if (size_a <= size_b)
3016         {
3017           index_a += 1;
3018           ref_a = object_a;
3019         }
3020       if (size_b <= size_a)
3021         {
3022           index_b += 1;
3023           ref_b = object_b;
3024         }
3025     }
3026
3027   /* See whether FULL_SEQ ends at the base and whether the two bases
3028      are equal.  We do not care about TBAA or alignment info so we can
3029      use OEP_ADDRESS_OF to avoid false negatives.  */
3030   tree base_a = DR_BASE_OBJECT (a);
3031   tree base_b = DR_BASE_OBJECT (b);
3032   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3033                       && full_seq.start_b + full_seq.length == num_dimensions_b
3034                       && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
3035                       && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3036                       && types_compatible_p (TREE_TYPE (base_a),
3037                                              TREE_TYPE (base_b))
3038                       && (!loop_nest.exists ()
3039                           || (object_address_invariant_in_loop_p
3040                               (loop_nest[0], base_a))));
3041
3042   /* If the bases are the same, we can include the base variation too.
3043      E.g. the b accesses in:
3044
3045        for (int i = 0; i < n; ++i)
3046          b[i + 4][0] = b[i][0];
3047
3048      have a definite dependence distance of 4, while for:
3049
3050        for (int i = 0; i < n; ++i)
3051          a[i + 4][0] = b[i][0];
3052
3053      the dependence distance depends on the gap between a and b.
3054
3055      If the bases are different then we can only rely on the sequence
3056      rooted at a structure access, since arrays are allowed to overlap
3057      arbitrarily and change shape arbitrarily.  E.g. we treat this as
3058      valid code:
3059
3060        int a[256];
3061        ...
3062        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3063
3064      where two lvalues with the same int[4][3] type overlap, and where
3065      both lvalues are distinct from the object's declared type.  */
3066   if (same_base_p)
3067     {
3068       if (DR_UNCONSTRAINED_BASE (a))
3069         full_seq.length += 1;
3070     }
3071   else
3072     full_seq = struct_seq;
3073
3074   /* Punt if we didn't find a suitable sequence.  */
3075   if (full_seq.length == 0)
3076     {
3077       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3078       return res;
3079     }
3080
3081   if (!same_base_p)
3082     {
3083       /* Partial overlap is possible for different bases when strict aliasing
3084          is not in effect.  It's also possible if either base involves a union
3085          access; e.g. for:
3086
3087            struct s1 { int a[2]; };
3088            struct s2 { struct s1 b; int c; };
3089            struct s3 { int d; struct s1 e; };
3090            union u { struct s2 f; struct s3 g; } *p, *q;
3091
3092          the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3093          "p->g.e" (base "p->g") and might partially overlap the s1 at
3094          "q->g.e" (base "q->g").  */
3095       if (!flag_strict_aliasing
3096           || ref_contains_union_access_p (full_seq.object_a)
3097           || ref_contains_union_access_p (full_seq.object_b))
3098         {
3099           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3100           return res;
3101         }
3102
3103       DDR_COULD_BE_INDEPENDENT_P (res) = true;
3104       if (!loop_nest.exists ()
3105           || (object_address_invariant_in_loop_p (loop_nest[0],
3106                                                   full_seq.object_a)
3107               && object_address_invariant_in_loop_p (loop_nest[0],
3108                                                      full_seq.object_b)))
3109         {
3110           DDR_OBJECT_A (res) = full_seq.object_a;
3111           DDR_OBJECT_B (res) = full_seq.object_b;
3112         }
3113     }
3114
3115   DDR_AFFINE_P (res) = true;
3116   DDR_ARE_DEPENDENT (res) = NULL_TREE;
3117   DDR_SUBSCRIPTS (res).create (full_seq.length);
3118   DDR_LOOP_NEST (res) = loop_nest;
3119   DDR_SELF_REFERENCE (res) = false;
3120
3121   for (i = 0; i < full_seq.length; ++i)
3122     {
3123       struct subscript *subscript;
3124
3125       subscript = XNEW (struct subscript);
3126       SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
3127       SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
3128       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3129       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3130       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3131       SUB_DISTANCE (subscript) = chrec_dont_know;
3132       DDR_SUBSCRIPTS (res).safe_push (subscript);
3133     }
3134
3135   return res;
3136 }
3137
3138 /* Frees memory used by the conflict function F.  */
3139
3140 static void
3141 free_conflict_function (conflict_function *f)
3142 {
3143   unsigned i;
3144
3145   if (CF_NONTRIVIAL_P (f))
3146     {
3147       for (i = 0; i < f->n; i++)
3148         affine_fn_free (f->fns[i]);
3149     }
3150   free (f);
3151 }
3152
3153 /* Frees memory used by SUBSCRIPTS.  */
3154
3155 static void
3156 free_subscripts (vec<subscript_p> subscripts)
3157 {
3158   unsigned i;
3159   subscript_p s;
3160
3161   FOR_EACH_VEC_ELT (subscripts, i, s)
3162     {
3163       free_conflict_function (s->conflicting_iterations_in_a);
3164       free_conflict_function (s->conflicting_iterations_in_b);
3165       free (s);
3166     }
3167   subscripts.release ();
3168 }
3169
3170 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3171    description.  */
3172
3173 static inline void
3174 finalize_ddr_dependent (struct data_dependence_relation *ddr,
3175                         tree chrec)
3176 {
3177   DDR_ARE_DEPENDENT (ddr) = chrec;
3178   free_subscripts (DDR_SUBSCRIPTS (ddr));
3179   DDR_SUBSCRIPTS (ddr).create (0);
3180 }
3181
3182 /* The dependence relation DDR cannot be represented by a distance
3183    vector.  */
3184
3185 static inline void
3186 non_affine_dependence_relation (struct data_dependence_relation *ddr)
3187 {
3188   if (dump_file && (dump_flags & TDF_DETAILS))
3189     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3190
3191   DDR_AFFINE_P (ddr) = false;
3192 }
3193
3194 \f
3195
3196 /* This section contains the classic Banerjee tests.  */
3197
3198 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3199    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
3200
3201 static inline bool
3202 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3203 {
3204   return (evolution_function_is_constant_p (chrec_a)
3205           && evolution_function_is_constant_p (chrec_b));
3206 }
3207
3208 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3209    variable, i.e., if the SIV (Single Index Variable) test is true.  */
3210
3211 static bool
3212 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3213 {
3214   if ((evolution_function_is_constant_p (chrec_a)
3215        && evolution_function_is_univariate_p (chrec_b))
3216       || (evolution_function_is_constant_p (chrec_b)
3217           && evolution_function_is_univariate_p (chrec_a)))
3218     return true;
3219
3220   if (evolution_function_is_univariate_p (chrec_a)
3221       && evolution_function_is_univariate_p (chrec_b))
3222     {
3223       switch (TREE_CODE (chrec_a))
3224         {
3225         case POLYNOMIAL_CHREC:
3226           switch (TREE_CODE (chrec_b))
3227             {
3228             case POLYNOMIAL_CHREC:
3229               if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3230                 return false;
3231               /* FALLTHRU */
3232
3233             default:
3234               return true;
3235             }
3236
3237         default:
3238           return true;
3239         }
3240     }
3241
3242   return false;
3243 }
3244
3245 /* Creates a conflict function with N dimensions.  The affine functions
3246    in each dimension follow.  */
3247
3248 static conflict_function *
3249 conflict_fn (unsigned n, ...)
3250 {
3251   unsigned i;
3252   conflict_function *ret = XCNEW (conflict_function);
3253   va_list ap;
3254
3255   gcc_assert (n > 0 && n <= MAX_DIM);
3256   va_start (ap, n);
3257
3258   ret->n = n;
3259   for (i = 0; i < n; i++)
3260     ret->fns[i] = va_arg (ap, affine_fn);
3261   va_end (ap);
3262
3263   return ret;
3264 }
3265
3266 /* Returns constant affine function with value CST.  */
3267
3268 static affine_fn
3269 affine_fn_cst (tree cst)
3270 {
3271   affine_fn fn;
3272   fn.create (1);
3273   fn.quick_push (cst);
3274   return fn;
3275 }
3276
3277 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
3278
3279 static affine_fn
3280 affine_fn_univar (tree cst, unsigned dim, tree coef)
3281 {
3282   affine_fn fn;
3283   fn.create (dim + 1);
3284   unsigned i;
3285
3286   gcc_assert (dim > 0);
3287   fn.quick_push (cst);
3288   for (i = 1; i < dim; i++)
3289     fn.quick_push (integer_zero_node);
3290   fn.quick_push (coef);
3291   return fn;
3292 }
3293
3294 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
3295    *OVERLAPS_B are initialized to the functions that describe the
3296    relation between the elements accessed twice by CHREC_A and
3297    CHREC_B.  For k >= 0, the following property is verified:
3298
3299    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3300
3301 static void
3302 analyze_ziv_subscript (tree chrec_a,
3303                        tree chrec_b,
3304                        conflict_function **overlaps_a,
3305                        conflict_function **overlaps_b,
3306                        tree *last_conflicts)
3307 {
3308   tree type, difference;
3309   dependence_stats.num_ziv++;
3310
3311   if (dump_file && (dump_flags & TDF_DETAILS))
3312     fprintf (dump_file, "(analyze_ziv_subscript \n");
3313
3314   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3315   chrec_a = chrec_convert (type, chrec_a, NULL);
3316   chrec_b = chrec_convert (type, chrec_b, NULL);
3317   difference = chrec_fold_minus (type, chrec_a, chrec_b);
3318
3319   switch (TREE_CODE (difference))
3320     {
3321     case INTEGER_CST:
3322       if (integer_zerop (difference))
3323         {
3324           /* The difference is equal to zero: the accessed index
3325              overlaps for each iteration in the loop.  */
3326           *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3327           *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3328           *last_conflicts = chrec_dont_know;
3329           dependence_stats.num_ziv_dependent++;
3330         }
3331       else
3332         {
3333           /* The accesses do not overlap.  */
3334           *overlaps_a = conflict_fn_no_dependence ();
3335           *overlaps_b = conflict_fn_no_dependence ();
3336           *last_conflicts = integer_zero_node;
3337           dependence_stats.num_ziv_independent++;
3338         }
3339       break;
3340
3341     default:
3342       /* We're not sure whether the indexes overlap.  For the moment,
3343          conservatively answer "don't know".  */
3344       if (dump_file && (dump_flags & TDF_DETAILS))
3345         fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3346
3347       *overlaps_a = conflict_fn_not_known ();
3348       *overlaps_b = conflict_fn_not_known ();
3349       *last_conflicts = chrec_dont_know;
3350       dependence_stats.num_ziv_unimplemented++;
3351       break;
3352     }
3353
3354   if (dump_file && (dump_flags & TDF_DETAILS))
3355     fprintf (dump_file, ")\n");
3356 }
3357
3358 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3359    and only if it fits to the int type.  If this is not the case, or the
3360    bound  on the number of iterations of LOOP could not be derived, returns
3361    chrec_dont_know.  */
3362
3363 static tree
3364 max_stmt_executions_tree (class loop *loop)
3365 {
3366   widest_int nit;
3367
3368   if (!max_stmt_executions (loop, &nit))
3369     return chrec_dont_know;
3370
3371   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3372     return chrec_dont_know;
3373
3374   return wide_int_to_tree (unsigned_type_node, nit);
3375 }
3376
3377 /* Determine whether the CHREC is always positive/negative.  If the expression
3378    cannot be statically analyzed, return false, otherwise set the answer into
3379    VALUE.  */
3380
3381 static bool
3382 chrec_is_positive (tree chrec, bool *value)
3383 {
3384   bool value0, value1, value2;
3385   tree end_value, nb_iter;
3386
3387   switch (TREE_CODE (chrec))
3388     {
3389     case POLYNOMIAL_CHREC:
3390       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3391           || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3392         return false;
3393
3394       /* FIXME -- overflows.  */
3395       if (value0 == value1)
3396         {
3397           *value = value0;
3398           return true;
3399         }
3400
3401       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3402          and the proof consists in showing that the sign never
3403          changes during the execution of the loop, from 0 to
3404          loop->nb_iterations.  */
3405       if (!evolution_function_is_affine_p (chrec))
3406         return false;
3407
3408       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3409       if (chrec_contains_undetermined (nb_iter))
3410         return false;
3411
3412 #if 0
3413       /* TODO -- If the test is after the exit, we may decrease the number of
3414          iterations by one.  */
3415       if (after_exit)
3416         nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3417 #endif
3418
3419       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3420
3421       if (!chrec_is_positive (end_value, &value2))
3422         return false;
3423
3424       *value = value0;
3425       return value0 == value1;
3426
3427     case INTEGER_CST:
3428       switch (tree_int_cst_sgn (chrec))
3429         {
3430         case -1:
3431           *value = false;
3432           break;
3433         case 1:
3434           *value = true;
3435           break;
3436         default:
3437           return false;
3438         }
3439       return true;
3440
3441     default:
3442       return false;
3443     }
3444 }
3445
3446
3447 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3448    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
3449    *OVERLAPS_B are initialized to the functions that describe the
3450    relation between the elements accessed twice by CHREC_A and
3451    CHREC_B.  For k >= 0, the following property is verified:
3452
3453    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3454
3455 static void
3456 analyze_siv_subscript_cst_affine (tree chrec_a,
3457                                   tree chrec_b,
3458                                   conflict_function **overlaps_a,
3459                                   conflict_function **overlaps_b,
3460                                   tree *last_conflicts)
3461 {
3462   bool value0, value1, value2;
3463   tree type, difference, tmp;
3464
3465   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3466   chrec_a = chrec_convert (type, chrec_a, NULL);
3467   chrec_b = chrec_convert (type, chrec_b, NULL);
3468   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3469
3470   /* Special case overlap in the first iteration.  */
3471   if (integer_zerop (difference))
3472     {
3473       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3474       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3475       *last_conflicts = integer_one_node;
3476       return;
3477     }
3478
3479   if (!chrec_is_positive (initial_condition (difference), &value0))
3480     {
3481       if (dump_file && (dump_flags & TDF_DETAILS))
3482         fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3483
3484       dependence_stats.num_siv_unimplemented++;
3485       *overlaps_a = conflict_fn_not_known ();
3486       *overlaps_b = conflict_fn_not_known ();
3487       *last_conflicts = chrec_dont_know;
3488       return;
3489     }
3490   else
3491     {
3492       if (value0 == false)
3493         {
3494           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3495               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3496             {
3497               if (dump_file && (dump_flags & TDF_DETAILS))
3498                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3499
3500               *overlaps_a = conflict_fn_not_known ();
3501               *overlaps_b = conflict_fn_not_known ();
3502               *last_conflicts = chrec_dont_know;
3503               dependence_stats.num_siv_unimplemented++;
3504               return;
3505             }
3506           else
3507             {
3508               if (value1 == true)
3509                 {
3510                   /* Example:
3511                      chrec_a = 12
3512                      chrec_b = {10, +, 1}
3513                   */
3514
3515                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3516                     {
3517                       HOST_WIDE_INT numiter;
3518                       class loop *loop = get_chrec_loop (chrec_b);
3519
3520                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3521                       tmp = fold_build2 (EXACT_DIV_EXPR, type,
3522                                          fold_build1 (ABS_EXPR, type, difference),
3523                                          CHREC_RIGHT (chrec_b));
3524                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3525                       *last_conflicts = integer_one_node;
3526
3527
3528                       /* Perform weak-zero siv test to see if overlap is
3529                          outside the loop bounds.  */
3530                       numiter = max_stmt_executions_int (loop);
3531
3532                       if (numiter >= 0
3533                           && compare_tree_int (tmp, numiter) > 0)
3534                         {
3535                           free_conflict_function (*overlaps_a);
3536                           free_conflict_function (*overlaps_b);
3537                           *overlaps_a = conflict_fn_no_dependence ();
3538                           *overlaps_b = conflict_fn_no_dependence ();
3539                           *last_conflicts = integer_zero_node;
3540                           dependence_stats.num_siv_independent++;
3541                           return;
3542                         }
3543                       dependence_stats.num_siv_dependent++;
3544                       return;
3545                     }
3546
3547                   /* When the step does not divide the difference, there are
3548                      no overlaps.  */
3549                   else
3550                     {
3551                       *overlaps_a = conflict_fn_no_dependence ();
3552                       *overlaps_b = conflict_fn_no_dependence ();
3553                       *last_conflicts = integer_zero_node;
3554                       dependence_stats.num_siv_independent++;
3555                       return;
3556                     }
3557                 }
3558
3559               else
3560                 {
3561                   /* Example:
3562                      chrec_a = 12
3563                      chrec_b = {10, +, -1}
3564
3565                      In this case, chrec_a will not overlap with chrec_b.  */
3566                   *overlaps_a = conflict_fn_no_dependence ();
3567                   *overlaps_b = conflict_fn_no_dependence ();
3568                   *last_conflicts = integer_zero_node;
3569                   dependence_stats.num_siv_independent++;
3570                   return;
3571                 }
3572             }
3573         }
3574       else
3575         {
3576           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3577               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3578             {
3579               if (dump_file && (dump_flags & TDF_DETAILS))
3580                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3581
3582               *overlaps_a = conflict_fn_not_known ();
3583               *overlaps_b = conflict_fn_not_known ();
3584               *last_conflicts = chrec_dont_know;
3585               dependence_stats.num_siv_unimplemented++;
3586               return;
3587             }
3588           else
3589             {
3590               if (value2 == false)
3591                 {
3592                   /* Example:
3593                      chrec_a = 3
3594                      chrec_b = {10, +, -1}
3595                   */
3596                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3597                     {
3598                       HOST_WIDE_INT numiter;
3599                       class loop *loop = get_chrec_loop (chrec_b);
3600
3601                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3602                       tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3603                                          CHREC_RIGHT (chrec_b));
3604                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3605                       *last_conflicts = integer_one_node;
3606
3607                       /* Perform weak-zero siv test to see if overlap is
3608                          outside the loop bounds.  */
3609                       numiter = max_stmt_executions_int (loop);
3610
3611                       if (numiter >= 0
3612                           && compare_tree_int (tmp, numiter) > 0)
3613                         {
3614                           free_conflict_function (*overlaps_a);
3615                           free_conflict_function (*overlaps_b);
3616                           *overlaps_a = conflict_fn_no_dependence ();
3617                           *overlaps_b = conflict_fn_no_dependence ();
3618                           *last_conflicts = integer_zero_node;
3619                           dependence_stats.num_siv_independent++;
3620                           return;
3621                         }
3622                       dependence_stats.num_siv_dependent++;
3623                       return;
3624                     }
3625
3626                   /* When the step does not divide the difference, there
3627                      are no overlaps.  */
3628                   else
3629                     {
3630                       *overlaps_a = conflict_fn_no_dependence ();
3631                       *overlaps_b = conflict_fn_no_dependence ();
3632                       *last_conflicts = integer_zero_node;
3633                       dependence_stats.num_siv_independent++;
3634                       return;
3635                     }
3636                 }
3637               else
3638                 {
3639                   /* Example:
3640                      chrec_a = 3
3641                      chrec_b = {4, +, 1}
3642
3643                      In this case, chrec_a will not overlap with chrec_b.  */
3644                   *overlaps_a = conflict_fn_no_dependence ();
3645                   *overlaps_b = conflict_fn_no_dependence ();
3646                   *last_conflicts = integer_zero_node;
3647                   dependence_stats.num_siv_independent++;
3648                   return;
3649                 }
3650             }
3651         }
3652     }
3653 }
3654
3655 /* Helper recursive function for initializing the matrix A.  Returns
3656    the initial value of CHREC.  */
3657
3658 static tree
3659 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3660 {
3661   gcc_assert (chrec);
3662
3663   switch (TREE_CODE (chrec))
3664     {
3665     case POLYNOMIAL_CHREC:
3666       if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
3667         return chrec_dont_know;
3668       A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
3669       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3670
3671     case PLUS_EXPR:
3672     case MULT_EXPR:
3673     case MINUS_EXPR:
3674       {
3675         tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3676         tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3677
3678         return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3679       }
3680
3681     CASE_CONVERT:
3682       {
3683         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3684         return chrec_convert (chrec_type (chrec), op, NULL);
3685       }
3686
3687     case BIT_NOT_EXPR:
3688       {
3689         /* Handle ~X as -1 - X.  */
3690         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3691         return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
3692                               build_int_cst (TREE_TYPE (chrec), -1), op);
3693       }
3694
3695     case INTEGER_CST:
3696       return chrec;
3697
3698     default:
3699       gcc_unreachable ();
3700       return NULL_TREE;
3701     }
3702 }
3703
3704 #define FLOOR_DIV(x,y) ((x) / (y))
3705
3706 /* Solves the special case of the Diophantine equation:
3707    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3708
3709    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
3710    number of iterations that loops X and Y run.  The overlaps will be
3711    constructed as evolutions in dimension DIM.  */
3712
3713 static void
3714 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
3715                                          HOST_WIDE_INT step_a,
3716                                          HOST_WIDE_INT step_b,
3717                                          affine_fn *overlaps_a,
3718                                          affine_fn *overlaps_b,
3719                                          tree *last_conflicts, int dim)
3720 {
3721   if (((step_a > 0 && step_b > 0)
3722        || (step_a < 0 && step_b < 0)))
3723     {
3724       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
3725       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
3726
3727       gcd_steps_a_b = gcd (step_a, step_b);
3728       step_overlaps_a = step_b / gcd_steps_a_b;
3729       step_overlaps_b = step_a / gcd_steps_a_b;
3730
3731       if (niter > 0)
3732         {
3733           tau2 = FLOOR_DIV (niter, step_overlaps_a);
3734           tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
3735           last_conflict = tau2;
3736           *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3737         }
3738       else
3739         *last_conflicts = chrec_dont_know;
3740
3741       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
3742                                       build_int_cst (NULL_TREE,
3743                                                      step_overlaps_a));
3744       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
3745                                       build_int_cst (NULL_TREE,
3746                                                      step_overlaps_b));
3747     }
3748
3749   else
3750     {
3751       *overlaps_a = affine_fn_cst (integer_zero_node);
3752       *overlaps_b = affine_fn_cst (integer_zero_node);
3753       *last_conflicts = integer_zero_node;
3754     }
3755 }
3756
3757 /* Solves the special case of a Diophantine equation where CHREC_A is
3758    an affine bivariate function, and CHREC_B is an affine univariate
3759    function.  For example,
3760
3761    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
3762
3763    has the following overlapping functions:
3764
3765    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
3766    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
3767    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
3768
3769    FORNOW: This is a specialized implementation for a case occurring in
3770    a common benchmark.  Implement the general algorithm.  */
3771
3772 static void
3773 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
3774                                       conflict_function **overlaps_a,
3775                                       conflict_function **overlaps_b,
3776                                       tree *last_conflicts)
3777 {
3778   bool xz_p, yz_p, xyz_p;
3779   HOST_WIDE_INT step_x, step_y, step_z;
3780   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
3781   affine_fn overlaps_a_xz, overlaps_b_xz;
3782   affine_fn overlaps_a_yz, overlaps_b_yz;
3783   affine_fn overlaps_a_xyz, overlaps_b_xyz;
3784   affine_fn ova1, ova2, ovb;
3785   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
3786
3787   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
3788   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
3789   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
3790
3791   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
3792   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
3793   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
3794
3795   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
3796     {
3797       if (dump_file && (dump_flags & TDF_DETAILS))
3798         fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
3799
3800       *overlaps_a = conflict_fn_not_known ();
3801       *overlaps_b = conflict_fn_not_known ();
3802       *last_conflicts = chrec_dont_know;
3803       return;
3804     }
3805
3806   niter = MIN (niter_x, niter_z);
3807   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
3808                                            &overlaps_a_xz,
3809                                            &overlaps_b_xz,
3810                                            &last_conflicts_xz, 1);
3811   niter = MIN (niter_y, niter_z);
3812   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
3813                                            &overlaps_a_yz,
3814                                            &overlaps_b_yz,
3815                                            &last_conflicts_yz, 2);
3816   niter = MIN (niter_x, niter_z);
3817   niter = MIN (niter_y, niter);
3818   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
3819                                            &overlaps_a_xyz,
3820                                            &overlaps_b_xyz,
3821                                            &last_conflicts_xyz, 3);
3822
3823   xz_p = !integer_zerop (last_conflicts_xz);
3824   yz_p = !integer_zerop (last_conflicts_yz);
3825   xyz_p = !integer_zerop (last_conflicts_xyz);
3826
3827   if (xz_p || yz_p || xyz_p)
3828     {
3829       ova1 = affine_fn_cst (integer_zero_node);
3830       ova2 = affine_fn_cst (integer_zero_node);
3831       ovb = affine_fn_cst (integer_zero_node);
3832       if (xz_p)
3833         {
3834           affine_fn t0 = ova1;
3835           affine_fn t2 = ovb;
3836
3837           ova1 = affine_fn_plus (ova1, overlaps_a_xz);
3838           ovb = affine_fn_plus (ovb, overlaps_b_xz);
3839           affine_fn_free (t0);
3840           affine_fn_free (t2);
3841           *last_conflicts = last_conflicts_xz;
3842         }
3843       if (yz_p)
3844         {
3845           affine_fn t0 = ova2;
3846           affine_fn t2 = ovb;
3847
3848           ova2 = affine_fn_plus (ova2, overlaps_a_yz);
3849           ovb = affine_fn_plus (ovb, overlaps_b_yz);
3850           affine_fn_free (t0);
3851           affine_fn_free (t2);
3852           *last_conflicts = last_conflicts_yz;
3853         }
3854       if (xyz_p)
3855         {
3856           affine_fn t0 = ova1;
3857           affine_fn t2 = ova2;
3858           affine_fn t4 = ovb;
3859
3860           ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
3861           ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
3862           ovb = affine_fn_plus (ovb, overlaps_b_xyz);
3863           affine_fn_free (t0);
3864           affine_fn_free (t2);
3865           affine_fn_free (t4);
3866           *last_conflicts = last_conflicts_xyz;
3867         }
3868       *overlaps_a = conflict_fn (2, ova1, ova2);
3869       *overlaps_b = conflict_fn (1, ovb);
3870     }
3871   else
3872     {
3873       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3874       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3875       *last_conflicts = integer_zero_node;
3876     }
3877
3878   affine_fn_free (overlaps_a_xz);
3879   affine_fn_free (overlaps_b_xz);
3880   affine_fn_free (overlaps_a_yz);
3881   affine_fn_free (overlaps_b_yz);
3882   affine_fn_free (overlaps_a_xyz);
3883   affine_fn_free (overlaps_b_xyz);
3884 }
3885
3886 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
3887
3888 static void
3889 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
3890                     int size)
3891 {
3892   memcpy (vec2, vec1, size * sizeof (*vec1));
3893 }
3894
3895 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
3896
3897 static void
3898 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
3899                     int m, int n)
3900 {
3901   int i;
3902
3903   for (i = 0; i < m; i++)
3904     lambda_vector_copy (mat1[i], mat2[i], n);
3905 }
3906
3907 /* Store the N x N identity matrix in MAT.  */
3908
3909 static void
3910 lambda_matrix_id (lambda_matrix mat, int size)
3911 {
3912   int i, j;
3913
3914   for (i = 0; i < size; i++)
3915     for (j = 0; j < size; j++)
3916       mat[i][j] = (i == j) ? 1 : 0;
3917 }
3918
3919 /* Return the index of the first nonzero element of vector VEC1 between
3920    START and N.  We must have START <= N.
3921    Returns N if VEC1 is the zero vector.  */
3922
3923 static int
3924 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
3925 {
3926   int j = start;
3927   while (j < n && vec1[j] == 0)
3928     j++;
3929   return j;
3930 }
3931
3932 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3933    R2 = R2 + CONST1 * R1.  */
3934
3935 static void
3936 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
3937                        lambda_int const1)
3938 {
3939   int i;
3940
3941   if (const1 == 0)
3942     return;
3943
3944   for (i = 0; i < n; i++)
3945     mat[r2][i] += const1 * mat[r1][i];
3946 }
3947
3948 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3949    and store the result in VEC2.  */
3950
3951 static void
3952 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
3953                           int size, lambda_int const1)
3954 {
3955   int i;
3956
3957   if (const1 == 0)
3958     lambda_vector_clear (vec2, size);
3959   else
3960     for (i = 0; i < size; i++)
3961       vec2[i] = const1 * vec1[i];
3962 }
3963
3964 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
3965
3966 static void
3967 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
3968                       int size)
3969 {
3970   lambda_vector_mult_const (vec1, vec2, size, -1);
3971 }
3972
3973 /* Negate row R1 of matrix MAT which has N columns.  */
3974
3975 static void
3976 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
3977 {
3978   lambda_vector_negate (mat[r1], mat[r1], n);
3979 }
3980
3981 /* Return true if two vectors are equal.  */
3982
3983 static bool
3984 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
3985 {
3986   int i;
3987   for (i = 0; i < size; i++)
3988     if (vec1[i] != vec2[i])
3989       return false;
3990   return true;
3991 }
3992
3993 /* Given an M x N integer matrix A, this function determines an M x
3994    M unimodular matrix U, and an M x N echelon matrix S such that
3995    "U.A = S".  This decomposition is also known as "right Hermite".
3996
3997    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3998    Restructuring Compilers" Utpal Banerjee.  */
3999
4000 static void
4001 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4002                              lambda_matrix S, lambda_matrix U)
4003 {
4004   int i, j, i0 = 0;
4005
4006   lambda_matrix_copy (A, S, m, n);
4007   lambda_matrix_id (U, m);
4008
4009   for (j = 0; j < n; j++)
4010     {
4011       if (lambda_vector_first_nz (S[j], m, i0) < m)
4012         {
4013           ++i0;
4014           for (i = m - 1; i >= i0; i--)
4015             {
4016               while (S[i][j] != 0)
4017                 {
4018                   lambda_int sigma, factor, a, b;
4019
4020                   a = S[i-1][j];
4021                   b = S[i][j];
4022                   sigma = (a * b < 0) ? -1: 1;
4023                   a = abs_hwi (a);
4024                   b = abs_hwi (b);
4025                   factor = sigma * (a / b);
4026
4027                   lambda_matrix_row_add (S, n, i, i-1, -factor);
4028                   std::swap (S[i], S[i-1]);
4029
4030                   lambda_matrix_row_add (U, m, i, i-1, -factor);
4031                   std::swap (U[i], U[i-1]);
4032                 }
4033             }
4034         }
4035     }
4036 }
4037
4038 /* Determines the overlapping elements due to accesses CHREC_A and
4039    CHREC_B, that are affine functions.  This function cannot handle
4040    symbolic evolution functions, ie. when initial conditions are
4041    parameters, because it uses lambda matrices of integers.  */
4042
4043 static void
4044 analyze_subscript_affine_affine (tree chrec_a,
4045                                  tree chrec_b,
4046                                  conflict_function **overlaps_a,
4047                                  conflict_function **overlaps_b,
4048                                  tree *last_conflicts)
4049 {
4050   unsigned nb_vars_a, nb_vars_b, dim;
4051   HOST_WIDE_INT gamma, gcd_alpha_beta;
4052   lambda_matrix A, U, S;
4053   struct obstack scratch_obstack;
4054
4055   if (eq_evolutions_p (chrec_a, chrec_b))
4056     {
4057       /* The accessed index overlaps for each iteration in the
4058          loop.  */
4059       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4060       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4061       *last_conflicts = chrec_dont_know;
4062       return;
4063     }
4064   if (dump_file && (dump_flags & TDF_DETAILS))
4065     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4066
4067   /* For determining the initial intersection, we have to solve a
4068      Diophantine equation.  This is the most time consuming part.
4069
4070      For answering to the question: "Is there a dependence?" we have
4071      to prove that there exists a solution to the Diophantine
4072      equation, and that the solution is in the iteration domain,
4073      i.e. the solution is positive or zero, and that the solution
4074      happens before the upper bound loop.nb_iterations.  Otherwise
4075      there is no dependence.  This function outputs a description of
4076      the iterations that hold the intersections.  */
4077
4078   nb_vars_a = nb_vars_in_chrec (chrec_a);
4079   nb_vars_b = nb_vars_in_chrec (chrec_b);
4080
4081   gcc_obstack_init (&scratch_obstack);
4082
4083   dim = nb_vars_a + nb_vars_b;
4084   U = lambda_matrix_new (dim, dim, &scratch_obstack);
4085   A = lambda_matrix_new (dim, 1, &scratch_obstack);
4086   S = lambda_matrix_new (dim, 1, &scratch_obstack);
4087
4088   tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4089   tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4090   if (init_a == chrec_dont_know
4091       || init_b == chrec_dont_know)
4092     {
4093       if (dump_file && (dump_flags & TDF_DETAILS))
4094         fprintf (dump_file, "affine-affine test failed: "
4095                  "representation issue.\n");
4096       *overlaps_a = conflict_fn_not_known ();
4097       *overlaps_b = conflict_fn_not_known ();
4098       *last_conflicts = chrec_dont_know;
4099       goto end_analyze_subs_aa;
4100     }
4101   gamma = int_cst_value (init_b) - int_cst_value (init_a);
4102
4103   /* Don't do all the hard work of solving the Diophantine equation
4104      when we already know the solution: for example,
4105      | {3, +, 1}_1
4106      | {3, +, 4}_2
4107      | gamma = 3 - 3 = 0.
4108      Then the first overlap occurs during the first iterations:
4109      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4110   */
4111   if (gamma == 0)
4112     {
4113       if (nb_vars_a == 1 && nb_vars_b == 1)
4114         {
4115           HOST_WIDE_INT step_a, step_b;
4116           HOST_WIDE_INT niter, niter_a, niter_b;
4117           affine_fn ova, ovb;
4118
4119           niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4120           niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4121           niter = MIN (niter_a, niter_b);
4122           step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4123           step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4124
4125           compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4126                                                    &ova, &ovb,
4127                                                    last_conflicts, 1);
4128           *overlaps_a = conflict_fn (1, ova);
4129           *overlaps_b = conflict_fn (1, ovb);
4130         }
4131
4132       else if (nb_vars_a == 2 && nb_vars_b == 1)
4133         compute_overlap_steps_for_affine_1_2
4134           (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4135
4136       else if (nb_vars_a == 1 && nb_vars_b == 2)
4137         compute_overlap_steps_for_affine_1_2
4138           (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4139
4140       else
4141         {
4142           if (dump_file && (dump_flags & TDF_DETAILS))
4143             fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4144           *overlaps_a = conflict_fn_not_known ();
4145           *overlaps_b = conflict_fn_not_known ();
4146           *last_conflicts = chrec_dont_know;
4147         }
4148       goto end_analyze_subs_aa;
4149     }
4150
4151   /* U.A = S */
4152   lambda_matrix_right_hermite (A, dim, 1, S, U);
4153
4154   if (S[0][0] < 0)
4155     {
4156       S[0][0] *= -1;
4157       lambda_matrix_row_negate (U, dim, 0);
4158     }
4159   gcd_alpha_beta = S[0][0];
4160
4161   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4162      but that is a quite strange case.  Instead of ICEing, answer
4163      don't know.  */
4164   if (gcd_alpha_beta == 0)
4165     {
4166       *overlaps_a = conflict_fn_not_known ();
4167       *overlaps_b = conflict_fn_not_known ();
4168       *last_conflicts = chrec_dont_know;
4169       goto end_analyze_subs_aa;
4170     }
4171
4172   /* The classic "gcd-test".  */
4173   if (!int_divides_p (gcd_alpha_beta, gamma))
4174     {
4175       /* The "gcd-test" has determined that there is no integer
4176          solution, i.e. there is no dependence.  */
4177       *overlaps_a = conflict_fn_no_dependence ();
4178       *overlaps_b = conflict_fn_no_dependence ();
4179       *last_conflicts = integer_zero_node;
4180     }
4181
4182   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
4183   else if (nb_vars_a == 1 && nb_vars_b == 1)
4184     {
4185       /* Both functions should have the same evolution sign.  */
4186       if (((A[0][0] > 0 && -A[1][0] > 0)
4187            || (A[0][0] < 0 && -A[1][0] < 0)))
4188         {
4189           /* The solutions are given by:
4190              |
4191              | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
4192              |                           [u21 u22]    [y0]
4193
4194              For a given integer t.  Using the following variables,
4195
4196              | i0 = u11 * gamma / gcd_alpha_beta
4197              | j0 = u12 * gamma / gcd_alpha_beta
4198              | i1 = u21
4199              | j1 = u22
4200
4201              the solutions are:
4202
4203              | x0 = i0 + i1 * t,
4204              | y0 = j0 + j1 * t.  */
4205           HOST_WIDE_INT i0, j0, i1, j1;
4206
4207           i0 = U[0][0] * gamma / gcd_alpha_beta;
4208           j0 = U[0][1] * gamma / gcd_alpha_beta;
4209           i1 = U[1][0];
4210           j1 = U[1][1];
4211
4212           if ((i1 == 0 && i0 < 0)
4213               || (j1 == 0 && j0 < 0))
4214             {
4215               /* There is no solution.
4216                  FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4217                  falls in here, but for the moment we don't look at the
4218                  upper bound of the iteration domain.  */
4219               *overlaps_a = conflict_fn_no_dependence ();
4220               *overlaps_b = conflict_fn_no_dependence ();
4221               *last_conflicts = integer_zero_node;
4222               goto end_analyze_subs_aa;
4223             }
4224
4225           if (i1 > 0 && j1 > 0)
4226             {
4227               HOST_WIDE_INT niter_a
4228                 = max_stmt_executions_int (get_chrec_loop (chrec_a));
4229               HOST_WIDE_INT niter_b
4230                 = max_stmt_executions_int (get_chrec_loop (chrec_b));
4231               HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4232
4233               /* (X0, Y0) is a solution of the Diophantine equation:
4234                  "chrec_a (X0) = chrec_b (Y0)".  */
4235               HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4236                                         CEIL (-j0, j1));
4237               HOST_WIDE_INT x0 = i1 * tau1 + i0;
4238               HOST_WIDE_INT y0 = j1 * tau1 + j0;
4239
4240               /* (X1, Y1) is the smallest positive solution of the eq
4241                  "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4242                  first conflict occurs.  */
4243               HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4244               HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4245               HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4246
4247               if (niter > 0)
4248                 {
4249                   /* If the overlap occurs outside of the bounds of the
4250                      loop, there is no dependence.  */
4251                   if (x1 >= niter_a || y1 >= niter_b)
4252                     {
4253                       *overlaps_a = conflict_fn_no_dependence ();
4254                       *overlaps_b = conflict_fn_no_dependence ();
4255                       *last_conflicts = integer_zero_node;
4256                       goto end_analyze_subs_aa;
4257                     }
4258
4259                   /* max stmt executions can get quite large, avoid
4260                      overflows by using wide ints here.  */
4261                   widest_int tau2
4262                     = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4263                                 wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4264                   widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4265                   if (wi::min_precision (last_conflict, SIGNED)
4266                       <= TYPE_PRECISION (integer_type_node))
4267                     *last_conflicts
4268                        = build_int_cst (integer_type_node,
4269                                         last_conflict.to_shwi ());
4270                   else
4271                     *last_conflicts = chrec_dont_know;
4272                 }
4273               else
4274                 *last_conflicts = chrec_dont_know;
4275
4276               *overlaps_a
4277                 = conflict_fn (1,
4278                                affine_fn_univar (build_int_cst (NULL_TREE, x1),
4279                                                  1,
4280                                                  build_int_cst (NULL_TREE, i1)));
4281               *overlaps_b
4282                 = conflict_fn (1,
4283                                affine_fn_univar (build_int_cst (NULL_TREE, y1),
4284                                                  1,
4285                                                  build_int_cst (NULL_TREE, j1)));
4286             }
4287           else
4288             {
4289               /* FIXME: For the moment, the upper bound of the
4290                  iteration domain for i and j is not checked.  */
4291               if (dump_file && (dump_flags & TDF_DETAILS))
4292                 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4293               *overlaps_a = conflict_fn_not_known ();
4294               *overlaps_b = conflict_fn_not_known ();
4295               *last_conflicts = chrec_dont_know;
4296             }
4297         }
4298       else
4299         {
4300           if (dump_file && (dump_flags & TDF_DETAILS))
4301             fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4302           *overlaps_a = conflict_fn_not_known ();
4303           *overlaps_b = conflict_fn_not_known ();
4304           *last_conflicts = chrec_dont_know;
4305         }
4306     }
4307   else
4308     {
4309       if (dump_file && (dump_flags & TDF_DETAILS))
4310         fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4311       *overlaps_a = conflict_fn_not_known ();
4312       *overlaps_b = conflict_fn_not_known ();
4313       *last_conflicts = chrec_dont_know;
4314     }
4315
4316 end_analyze_subs_aa:
4317   obstack_free (&scratch_obstack, NULL);
4318   if (dump_file && (dump_flags & TDF_DETAILS))
4319     {
4320       fprintf (dump_file, "  (overlaps_a = ");
4321       dump_conflict_function (dump_file, *overlaps_a);
4322       fprintf (dump_file, ")\n  (overlaps_b = ");
4323       dump_conflict_function (dump_file, *overlaps_b);
4324       fprintf (dump_file, "))\n");
4325     }
4326 }
4327
4328 /* Returns true when analyze_subscript_affine_affine can be used for
4329    determining the dependence relation between chrec_a and chrec_b,
4330    that contain symbols.  This function modifies chrec_a and chrec_b
4331    such that the analysis result is the same, and such that they don't
4332    contain symbols, and then can safely be passed to the analyzer.
4333
4334    Example: The analysis of the following tuples of evolutions produce
4335    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4336    vs. {0, +, 1}_1
4337
4338    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4339    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4340 */
4341
4342 static bool
4343 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4344 {
4345   tree diff, type, left_a, left_b, right_b;
4346
4347   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4348       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4349     /* FIXME: For the moment not handled.  Might be refined later.  */
4350     return false;
4351
4352   type = chrec_type (*chrec_a);
4353   left_a = CHREC_LEFT (*chrec_a);
4354   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4355   diff = chrec_fold_minus (type, left_a, left_b);
4356
4357   if (!evolution_function_is_constant_p (diff))
4358     return false;
4359
4360   if (dump_file && (dump_flags & TDF_DETAILS))
4361     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4362
4363   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4364                                      diff, CHREC_RIGHT (*chrec_a));
4365   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4366   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4367                                      build_int_cst (type, 0),
4368                                      right_b);
4369   return true;
4370 }
4371
4372 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
4373    *OVERLAPS_B are initialized to the functions that describe the
4374    relation between the elements accessed twice by CHREC_A and
4375    CHREC_B.  For k >= 0, the following property is verified:
4376
4377    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4378
4379 static void
4380 analyze_siv_subscript (tree chrec_a,
4381                        tree chrec_b,
4382                        conflict_function **overlaps_a,
4383                        conflict_function **overlaps_b,
4384                        tree *last_conflicts,
4385                        int loop_nest_num)
4386 {
4387   dependence_stats.num_siv++;
4388
4389   if (dump_file && (dump_flags & TDF_DETAILS))
4390     fprintf (dump_file, "(analyze_siv_subscript \n");
4391
4392   if (evolution_function_is_constant_p (chrec_a)
4393       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4394     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4395                                       overlaps_a, overlaps_b, last_conflicts);
4396
4397   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4398            && evolution_function_is_constant_p (chrec_b))
4399     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4400                                       overlaps_b, overlaps_a, last_conflicts);
4401
4402   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4403            && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4404     {
4405       if (!chrec_contains_symbols (chrec_a)
4406           && !chrec_contains_symbols (chrec_b))
4407         {
4408           analyze_subscript_affine_affine (chrec_a, chrec_b,
4409                                            overlaps_a, overlaps_b,
4410                                            last_conflicts);
4411
4412           if (CF_NOT_KNOWN_P (*overlaps_a)
4413               || CF_NOT_KNOWN_P (*overlaps_b))
4414             dependence_stats.num_siv_unimplemented++;
4415           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4416                    || CF_NO_DEPENDENCE_P (*overlaps_b))
4417             dependence_stats.num_siv_independent++;
4418           else
4419             dependence_stats.num_siv_dependent++;
4420         }
4421       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4422                                                         &chrec_b))
4423         {
4424           analyze_subscript_affine_affine (chrec_a, chrec_b,
4425                                            overlaps_a, overlaps_b,
4426                                            last_conflicts);
4427
4428           if (CF_NOT_KNOWN_P (*overlaps_a)
4429               || CF_NOT_KNOWN_P (*overlaps_b))
4430             dependence_stats.num_siv_unimplemented++;
4431           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4432                    || CF_NO_DEPENDENCE_P (*overlaps_b))
4433             dependence_stats.num_siv_independent++;
4434           else
4435             dependence_stats.num_siv_dependent++;
4436         }
4437       else
4438         goto siv_subscript_dontknow;
4439     }
4440
4441   else
4442     {
4443     siv_subscript_dontknow:;
4444       if (dump_file && (dump_flags & TDF_DETAILS))
4445         fprintf (dump_file, "  siv test failed: unimplemented");
4446       *overlaps_a = conflict_fn_not_known ();
4447       *overlaps_b = conflict_fn_not_known ();
4448       *last_conflicts = chrec_dont_know;
4449       dependence_stats.num_siv_unimplemented++;
4450     }
4451
4452   if (dump_file && (dump_flags & TDF_DETAILS))
4453     fprintf (dump_file, ")\n");
4454 }
4455
4456 /* Returns false if we can prove that the greatest common divisor of the steps
4457    of CHREC does not divide CST, false otherwise.  */
4458
4459 static bool
4460 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4461 {
4462   HOST_WIDE_INT cd = 0, val;
4463   tree step;
4464
4465   if (!tree_fits_shwi_p (cst))
4466     return true;
4467   val = tree_to_shwi (cst);
4468
4469   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4470     {
4471       step = CHREC_RIGHT (chrec);
4472       if (!tree_fits_shwi_p (step))
4473         return true;
4474       cd = gcd (cd, tree_to_shwi (step));
4475       chrec = CHREC_LEFT (chrec);
4476     }
4477
4478   return val % cd == 0;
4479 }
4480
4481 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4482    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
4483    functions that describe the relation between the elements accessed
4484    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
4485    is verified:
4486
4487    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4488
4489 static void
4490 analyze_miv_subscript (tree chrec_a,
4491                        tree chrec_b,
4492                        conflict_function **overlaps_a,
4493                        conflict_function **overlaps_b,
4494                        tree *last_conflicts,
4495                        class loop *loop_nest)
4496 {
4497   tree type, difference;
4498
4499   dependence_stats.num_miv++;
4500   if (dump_file && (dump_flags & TDF_DETAILS))
4501     fprintf (dump_file, "(analyze_miv_subscript \n");
4502
4503   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4504   chrec_a = chrec_convert (type, chrec_a, NULL);
4505   chrec_b = chrec_convert (type, chrec_b, NULL);
4506   difference = chrec_fold_minus (type, chrec_a, chrec_b);
4507
4508   if (eq_evolutions_p (chrec_a, chrec_b))
4509     {
4510       /* Access functions are the same: all the elements are accessed
4511          in the same order.  */
4512       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4513       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4514       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4515       dependence_stats.num_miv_dependent++;
4516     }
4517
4518   else if (evolution_function_is_constant_p (difference)
4519            && evolution_function_is_affine_multivariate_p (chrec_a,
4520                                                            loop_nest->num)
4521            && !gcd_of_steps_may_divide_p (chrec_a, difference))
4522     {
4523       /* testsuite/.../ssa-chrec-33.c
4524          {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
4525
4526          The difference is 1, and all the evolution steps are multiples
4527          of 2, consequently there are no overlapping elements.  */
4528       *overlaps_a = conflict_fn_no_dependence ();
4529       *overlaps_b = conflict_fn_no_dependence ();
4530       *last_conflicts = integer_zero_node;
4531       dependence_stats.num_miv_independent++;
4532     }
4533
4534   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4535            && !chrec_contains_symbols (chrec_a, loop_nest)
4536            && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4537            && !chrec_contains_symbols (chrec_b, loop_nest))
4538     {
4539       /* testsuite/.../ssa-chrec-35.c
4540          {0, +, 1}_2  vs.  {0, +, 1}_3
4541          the overlapping elements are respectively located at iterations:
4542          {0, +, 1}_x and {0, +, 1}_x,
4543          in other words, we have the equality:
4544          {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4545
4546          Other examples:
4547          {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4548          {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4549
4550          {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4551          {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4552       */
4553       analyze_subscript_affine_affine (chrec_a, chrec_b,
4554                                        overlaps_a, overlaps_b, last_conflicts);
4555
4556       if (CF_NOT_KNOWN_P (*overlaps_a)
4557           || CF_NOT_KNOWN_P (*overlaps_b))
4558         dependence_stats.num_miv_unimplemented++;
4559       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4560                || CF_NO_DEPENDENCE_P (*overlaps_b))
4561         dependence_stats.num_miv_independent++;
4562       else
4563         dependence_stats.num_miv_dependent++;
4564     }
4565
4566   else
4567     {
4568       /* When the analysis is too difficult, answer "don't know".  */
4569       if (dump_file && (dump_flags & TDF_DETAILS))
4570         fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4571
4572       *overlaps_a = conflict_fn_not_known ();
4573       *overlaps_b = conflict_fn_not_known ();
4574       *last_conflicts = chrec_dont_know;
4575       dependence_stats.num_miv_unimplemented++;
4576     }
4577
4578   if (dump_file && (dump_flags & TDF_DETAILS))
4579     fprintf (dump_file, ")\n");
4580 }
4581
4582 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4583    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
4584    OVERLAP_ITERATIONS_B are initialized with two functions that
4585    describe the iterations that contain conflicting elements.
4586
4587    Remark: For an integer k >= 0, the following equality is true:
4588
4589    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4590 */
4591
4592 static void
4593 analyze_overlapping_iterations (tree chrec_a,
4594                                 tree chrec_b,
4595                                 conflict_function **overlap_iterations_a,
4596                                 conflict_function **overlap_iterations_b,
4597                                 tree *last_conflicts, class loop *loop_nest)
4598 {
4599   unsigned int lnn = loop_nest->num;
4600
4601   dependence_stats.num_subscript_tests++;
4602
4603   if (dump_file && (dump_flags & TDF_DETAILS))
4604     {
4605       fprintf (dump_file, "(analyze_overlapping_iterations \n");
4606       fprintf (dump_file, "  (chrec_a = ");
4607       print_generic_expr (dump_file, chrec_a);
4608       fprintf (dump_file, ")\n  (chrec_b = ");
4609       print_generic_expr (dump_file, chrec_b);
4610       fprintf (dump_file, ")\n");
4611     }
4612
4613   if (chrec_a == NULL_TREE
4614       || chrec_b == NULL_TREE
4615       || chrec_contains_undetermined (chrec_a)
4616       || chrec_contains_undetermined (chrec_b))
4617     {
4618       dependence_stats.num_subscript_undetermined++;
4619
4620       *overlap_iterations_a = conflict_fn_not_known ();
4621       *overlap_iterations_b = conflict_fn_not_known ();
4622     }
4623
4624   /* If they are the same chrec, and are affine, they overlap
4625      on every iteration.  */
4626   else if (eq_evolutions_p (chrec_a, chrec_b)
4627            && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4628                || operand_equal_p (chrec_a, chrec_b, 0)))
4629     {
4630       dependence_stats.num_same_subscript_function++;
4631       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4632       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4633       *last_conflicts = chrec_dont_know;
4634     }
4635
4636   /* If they aren't the same, and aren't affine, we can't do anything
4637      yet.  */
4638   else if ((chrec_contains_symbols (chrec_a)
4639             || chrec_contains_symbols (chrec_b))
4640            && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4641                || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4642     {
4643       dependence_stats.num_subscript_undetermined++;
4644       *overlap_iterations_a = conflict_fn_not_known ();
4645       *overlap_iterations_b = conflict_fn_not_known ();
4646     }
4647
4648   else if (ziv_subscript_p (chrec_a, chrec_b))
4649     analyze_ziv_subscript (chrec_a, chrec_b,
4650                            overlap_iterations_a, overlap_iterations_b,
4651                            last_conflicts);
4652
4653   else if (siv_subscript_p (chrec_a, chrec_b))
4654     analyze_siv_subscript (chrec_a, chrec_b,
4655                            overlap_iterations_a, overlap_iterations_b,
4656                            last_conflicts, lnn);
4657
4658   else
4659     analyze_miv_subscript (chrec_a, chrec_b,
4660                            overlap_iterations_a, overlap_iterations_b,
4661                            last_conflicts, loop_nest);
4662
4663   if (dump_file && (dump_flags & TDF_DETAILS))
4664     {
4665       fprintf (dump_file, "  (overlap_iterations_a = ");
4666       dump_conflict_function (dump_file, *overlap_iterations_a);
4667       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
4668       dump_conflict_function (dump_file, *overlap_iterations_b);
4669       fprintf (dump_file, "))\n");
4670     }
4671 }
4672
4673 /* Helper function for uniquely inserting distance vectors.  */
4674
4675 static void
4676 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
4677 {
4678   unsigned i;
4679   lambda_vector v;
4680
4681   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
4682     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
4683       return;
4684
4685   DDR_DIST_VECTS (ddr).safe_push (dist_v);
4686 }
4687
4688 /* Helper function for uniquely inserting direction vectors.  */
4689
4690 static void
4691 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
4692 {
4693   unsigned i;
4694   lambda_vector v;
4695
4696   FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
4697     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
4698       return;
4699
4700   DDR_DIR_VECTS (ddr).safe_push (dir_v);
4701 }
4702
4703 /* Add a distance of 1 on all the loops outer than INDEX.  If we
4704    haven't yet determined a distance for this outer loop, push a new
4705    distance vector composed of the previous distance, and a distance
4706    of 1 for this outer loop.  Example:
4707
4708    | loop_1
4709    |   loop_2
4710    |     A[10]
4711    |   endloop_2
4712    | endloop_1
4713
4714    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
4715    save (0, 1), then we have to save (1, 0).  */
4716
4717 static void
4718 add_outer_distances (struct data_dependence_relation *ddr,
4719                      lambda_vector dist_v, int index)
4720 {
4721   /* For each outer loop where init_v is not set, the accesses are
4722      in dependence of distance 1 in the loop.  */
4723   while (--index >= 0)
4724     {
4725       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4726       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4727       save_v[index] = 1;
4728       save_dist_v (ddr, save_v);
4729     }
4730 }
4731
4732 /* Return false when fail to represent the data dependence as a
4733    distance vector.  A_INDEX is the index of the first reference
4734    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
4735    second reference.  INIT_B is set to true when a component has been
4736    added to the distance vector DIST_V.  INDEX_CARRY is then set to
4737    the index in DIST_V that carries the dependence.  */
4738
4739 static bool
4740 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
4741                              unsigned int a_index, unsigned int b_index,
4742                              lambda_vector dist_v, bool *init_b,
4743                              int *index_carry)
4744 {
4745   unsigned i;
4746   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4747   class loop *loop = DDR_LOOP_NEST (ddr)[0];
4748
4749   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4750     {
4751       tree access_fn_a, access_fn_b;
4752       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
4753
4754       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4755         {
4756           non_affine_dependence_relation (ddr);
4757           return false;
4758         }
4759
4760       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
4761       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
4762
4763       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
4764           && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
4765         {
4766           HOST_WIDE_INT dist;
4767           int index;
4768           int var_a = CHREC_VARIABLE (access_fn_a);
4769           int var_b = CHREC_VARIABLE (access_fn_b);
4770
4771           if (var_a != var_b
4772               || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4773             {
4774               non_affine_dependence_relation (ddr);
4775               return false;
4776             }
4777
4778           /* When data references are collected in a loop while data
4779              dependences are analyzed in loop nest nested in the loop, we
4780              would have more number of access functions than number of
4781              loops.  Skip access functions of loops not in the loop nest.
4782
4783              See PR89725 for more information.  */
4784           if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
4785             continue;
4786
4787           dist = int_cst_value (SUB_DISTANCE (subscript));
4788           index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
4789           *index_carry = MIN (index, *index_carry);
4790
4791           /* This is the subscript coupling test.  If we have already
4792              recorded a distance for this loop (a distance coming from
4793              another subscript), it should be the same.  For example,
4794              in the following code, there is no dependence:
4795
4796              | loop i = 0, N, 1
4797              |   T[i+1][i] = ...
4798              |   ... = T[i][i]
4799              | endloop
4800           */
4801           if (init_v[index] != 0 && dist_v[index] != dist)
4802             {
4803               finalize_ddr_dependent (ddr, chrec_known);
4804               return false;
4805             }
4806
4807           dist_v[index] = dist;
4808           init_v[index] = 1;
4809           *init_b = true;
4810         }
4811       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
4812         {
4813           /* This can be for example an affine vs. constant dependence
4814              (T[i] vs. T[3]) that is not an affine dependence and is
4815              not representable as a distance vector.  */
4816           non_affine_dependence_relation (ddr);
4817           return false;
4818         }
4819     }
4820
4821   return true;
4822 }
4823
4824 /* Return true when the DDR contains only constant access functions.  */
4825
4826 static bool
4827 constant_access_functions (const struct data_dependence_relation *ddr)
4828 {
4829   unsigned i;
4830   subscript *sub;
4831
4832   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4833     if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
4834         || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
4835       return false;
4836
4837   return true;
4838 }
4839
4840 /* Helper function for the case where DDR_A and DDR_B are the same
4841    multivariate access function with a constant step.  For an example
4842    see pr34635-1.c.  */
4843
4844 static void
4845 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
4846 {
4847   int x_1, x_2;
4848   tree c_1 = CHREC_LEFT (c_2);
4849   tree c_0 = CHREC_LEFT (c_1);
4850   lambda_vector dist_v;
4851   HOST_WIDE_INT v1, v2, cd;
4852
4853   /* Polynomials with more than 2 variables are not handled yet.  When
4854      the evolution steps are parameters, it is not possible to
4855      represent the dependence using classical distance vectors.  */
4856   if (TREE_CODE (c_0) != INTEGER_CST
4857       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
4858       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
4859     {
4860       DDR_AFFINE_P (ddr) = false;
4861       return;
4862     }
4863
4864   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
4865   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
4866
4867   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
4868   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4869   v1 = int_cst_value (CHREC_RIGHT (c_1));
4870   v2 = int_cst_value (CHREC_RIGHT (c_2));
4871   cd = gcd (v1, v2);
4872   v1 /= cd;
4873   v2 /= cd;
4874
4875   if (v2 < 0)
4876     {
4877       v2 = -v2;
4878       v1 = -v1;
4879     }
4880
4881   dist_v[x_1] = v2;
4882   dist_v[x_2] = -v1;
4883   save_dist_v (ddr, dist_v);
4884
4885   add_outer_distances (ddr, dist_v, x_1);
4886 }
4887
4888 /* Helper function for the case where DDR_A and DDR_B are the same
4889    access functions.  */
4890
4891 static void
4892 add_other_self_distances (struct data_dependence_relation *ddr)
4893 {
4894   lambda_vector dist_v;
4895   unsigned i;
4896   int index_carry = DDR_NB_LOOPS (ddr);
4897   subscript *sub;
4898   class loop *loop = DDR_LOOP_NEST (ddr)[0];
4899
4900   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4901     {
4902       tree access_fun = SUB_ACCESS_FN (sub, 0);
4903
4904       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
4905         {
4906           if (!evolution_function_is_univariate_p (access_fun, loop->num))
4907             {
4908               if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
4909                 {
4910                   DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
4911                   return;
4912                 }
4913
4914               access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
4915
4916               if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
4917                 add_multivariate_self_dist (ddr, access_fun);
4918               else
4919                 /* The evolution step is not constant: it varies in
4920                    the outer loop, so this cannot be represented by a
4921                    distance vector.  For example in pr34635.c the
4922                    evolution is {0, +, {0, +, 4}_1}_2.  */
4923                 DDR_AFFINE_P (ddr) = false;
4924
4925               return;
4926             }
4927
4928           /* When data references are collected in a loop while data
4929              dependences are analyzed in loop nest nested in the loop, we
4930              would have more number of access functions than number of
4931              loops.  Skip access functions of loops not in the loop nest.
4932
4933              See PR89725 for more information.  */
4934           if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
4935                                   loop))
4936             continue;
4937
4938           index_carry = MIN (index_carry,
4939                              index_in_loop_nest (CHREC_VARIABLE (access_fun),
4940                                                  DDR_LOOP_NEST (ddr)));
4941         }
4942     }
4943
4944   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4945   add_outer_distances (ddr, dist_v, index_carry);
4946 }
4947
4948 static void
4949 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
4950 {
4951   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4952
4953   dist_v[0] = 1;
4954   save_dist_v (ddr, dist_v);
4955 }
4956
4957 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
4958    is the case for example when access functions are the same and
4959    equal to a constant, as in:
4960
4961    | loop_1
4962    |   A[3] = ...
4963    |   ... = A[3]
4964    | endloop_1
4965
4966    in which case the distance vectors are (0) and (1).  */
4967
4968 static void
4969 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
4970 {
4971   unsigned i, j;
4972
4973   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4974     {
4975       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
4976       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
4977       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
4978
4979       for (j = 0; j < ca->n; j++)
4980         if (affine_function_zero_p (ca->fns[j]))
4981           {
4982             insert_innermost_unit_dist_vector (ddr);
4983             return;
4984           }
4985
4986       for (j = 0; j < cb->n; j++)
4987         if (affine_function_zero_p (cb->fns[j]))
4988           {
4989             insert_innermost_unit_dist_vector (ddr);
4990             return;
4991           }
4992     }
4993 }
4994
4995 /* Return true when the DDR contains two data references that have the
4996    same access functions.  */
4997
4998 static inline bool
4999 same_access_functions (const struct data_dependence_relation *ddr)
5000 {
5001   unsigned i;
5002   subscript *sub;
5003
5004   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5005     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5006                           SUB_ACCESS_FN (sub, 1)))
5007       return false;
5008
5009   return true;
5010 }
5011
5012 /* Compute the classic per loop distance vector.  DDR is the data
5013    dependence relation to build a vector from.  Return false when fail
5014    to represent the data dependence as a distance vector.  */
5015
5016 static bool
5017 build_classic_dist_vector (struct data_dependence_relation *ddr,
5018                            class loop *loop_nest)
5019 {
5020   bool init_b = false;
5021   int index_carry = DDR_NB_LOOPS (ddr);
5022   lambda_vector dist_v;
5023
5024   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5025     return false;
5026
5027   if (same_access_functions (ddr))
5028     {
5029       /* Save the 0 vector.  */
5030       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5031       save_dist_v (ddr, dist_v);
5032
5033       if (constant_access_functions (ddr))
5034         add_distance_for_zero_overlaps (ddr);
5035
5036       if (DDR_NB_LOOPS (ddr) > 1)
5037         add_other_self_distances (ddr);
5038
5039       return true;
5040     }
5041
5042   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5043   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5044     return false;
5045
5046   /* Save the distance vector if we initialized one.  */
5047   if (init_b)
5048     {
5049       /* Verify a basic constraint: classic distance vectors should
5050          always be lexicographically positive.
5051
5052          Data references are collected in the order of execution of
5053          the program, thus for the following loop
5054
5055          | for (i = 1; i < 100; i++)
5056          |   for (j = 1; j < 100; j++)
5057          |     {
5058          |       t = T[j+1][i-1];  // A
5059          |       T[j][i] = t + 2;  // B
5060          |     }
5061
5062          references are collected following the direction of the wind:
5063          A then B.  The data dependence tests are performed also
5064          following this order, such that we're looking at the distance
5065          separating the elements accessed by A from the elements later
5066          accessed by B.  But in this example, the distance returned by
5067          test_dep (A, B) is lexicographically negative (-1, 1), that
5068          means that the access A occurs later than B with respect to
5069          the outer loop, ie. we're actually looking upwind.  In this
5070          case we solve test_dep (B, A) looking downwind to the
5071          lexicographically positive solution, that returns the
5072          distance vector (1, -1).  */
5073       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5074         {
5075           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5076           if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5077             return false;
5078           compute_subscript_distance (ddr);
5079           if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5080                                             &index_carry))
5081             return false;
5082           save_dist_v (ddr, save_v);
5083           DDR_REVERSED_P (ddr) = true;
5084
5085           /* In this case there is a dependence forward for all the
5086              outer loops:
5087
5088              | for (k = 1; k < 100; k++)
5089              |  for (i = 1; i < 100; i++)
5090              |   for (j = 1; j < 100; j++)
5091              |     {
5092              |       t = T[j+1][i-1];  // A
5093              |       T[j][i] = t + 2;  // B
5094              |     }
5095
5096              the vectors are:
5097              (0,  1, -1)
5098              (1,  1, -1)
5099              (1, -1,  1)
5100           */
5101           if (DDR_NB_LOOPS (ddr) > 1)
5102             {
5103               add_outer_distances (ddr, save_v, index_carry);
5104               add_outer_distances (ddr, dist_v, index_carry);
5105             }
5106         }
5107       else
5108         {
5109           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5110           lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5111
5112           if (DDR_NB_LOOPS (ddr) > 1)
5113             {
5114               lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5115
5116               if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5117                 return false;
5118               compute_subscript_distance (ddr);
5119               if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5120                                                 &index_carry))
5121                 return false;
5122
5123               save_dist_v (ddr, save_v);
5124               add_outer_distances (ddr, dist_v, index_carry);
5125               add_outer_distances (ddr, opposite_v, index_carry);
5126             }
5127           else
5128             save_dist_v (ddr, save_v);
5129         }
5130     }
5131   else
5132     {
5133       /* There is a distance of 1 on all the outer loops: Example:
5134          there is a dependence of distance 1 on loop_1 for the array A.
5135
5136          | loop_1
5137          |   A[5] = ...
5138          | endloop
5139       */
5140       add_outer_distances (ddr, dist_v,
5141                            lambda_vector_first_nz (dist_v,
5142                                                    DDR_NB_LOOPS (ddr), 0));
5143     }
5144
5145   if (dump_file && (dump_flags & TDF_DETAILS))
5146     {
5147       unsigned i;
5148
5149       fprintf (dump_file, "(build_classic_dist_vector\n");
5150       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5151         {
5152           fprintf (dump_file, "  dist_vector = (");
5153           print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5154                                DDR_NB_LOOPS (ddr));
5155           fprintf (dump_file, "  )\n");
5156         }
5157       fprintf (dump_file, ")\n");
5158     }
5159
5160   return true;
5161 }
5162
5163 /* Return the direction for a given distance.
5164    FIXME: Computing dir this way is suboptimal, since dir can catch
5165    cases that dist is unable to represent.  */
5166
5167 static inline enum data_dependence_direction
5168 dir_from_dist (int dist)
5169 {
5170   if (dist > 0)
5171     return dir_positive;
5172   else if (dist < 0)
5173     return dir_negative;
5174   else
5175     return dir_equal;
5176 }
5177
5178 /* Compute the classic per loop direction vector.  DDR is the data
5179    dependence relation to build a vector from.  */
5180
5181 static void
5182 build_classic_dir_vector (struct data_dependence_relation *ddr)
5183 {
5184   unsigned i, j;
5185   lambda_vector dist_v;
5186
5187   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5188     {
5189       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5190
5191       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5192         dir_v[j] = dir_from_dist (dist_v[j]);
5193
5194       save_dir_v (ddr, dir_v);
5195     }
5196 }
5197
5198 /* Helper function.  Returns true when there is a dependence between the
5199    data references.  A_INDEX is the index of the first reference (0 for
5200    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
5201
5202 static bool
5203 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5204                                unsigned int a_index, unsigned int b_index,
5205                                class loop *loop_nest)
5206 {
5207   unsigned int i;
5208   tree last_conflicts;
5209   struct subscript *subscript;
5210   tree res = NULL_TREE;
5211
5212   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5213     {
5214       conflict_function *overlaps_a, *overlaps_b;
5215
5216       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5217                                       SUB_ACCESS_FN (subscript, b_index),
5218                                       &overlaps_a, &overlaps_b,
5219                                       &last_conflicts, loop_nest);
5220
5221       if (SUB_CONFLICTS_IN_A (subscript))
5222         free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5223       if (SUB_CONFLICTS_IN_B (subscript))
5224         free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5225
5226       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5227       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5228       SUB_LAST_CONFLICT (subscript) = last_conflicts;
5229
5230       /* If there is any undetermined conflict function we have to
5231          give a conservative answer in case we cannot prove that
5232          no dependence exists when analyzing another subscript.  */
5233       if (CF_NOT_KNOWN_P (overlaps_a)
5234           || CF_NOT_KNOWN_P (overlaps_b))
5235         {
5236           res = chrec_dont_know;
5237           continue;
5238         }
5239
5240       /* When there is a subscript with no dependence we can stop.  */
5241       else if (CF_NO_DEPENDENCE_P (overlaps_a)
5242                || CF_NO_DEPENDENCE_P (overlaps_b))
5243         {
5244           res = chrec_known;
5245           break;
5246         }
5247     }
5248
5249   if (res == NULL_TREE)
5250     return true;
5251
5252   if (res == chrec_known)
5253     dependence_stats.num_dependence_independent++;
5254   else
5255     dependence_stats.num_dependence_undetermined++;
5256   finalize_ddr_dependent (ddr, res);
5257   return false;
5258 }
5259
5260 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
5261
5262 static void
5263 subscript_dependence_tester (struct data_dependence_relation *ddr,
5264                              class loop *loop_nest)
5265 {
5266   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5267     dependence_stats.num_dependence_dependent++;
5268
5269   compute_subscript_distance (ddr);
5270   if (build_classic_dist_vector (ddr, loop_nest))
5271     build_classic_dir_vector (ddr);
5272 }
5273
5274 /* Returns true when all the access functions of A are affine or
5275    constant with respect to LOOP_NEST.  */
5276
5277 static bool
5278 access_functions_are_affine_or_constant_p (const struct data_reference *a,
5279                                            const class loop *loop_nest)
5280 {
5281   unsigned int i;
5282   vec<tree> fns = DR_ACCESS_FNS (a);
5283   tree t;
5284
5285   FOR_EACH_VEC_ELT (fns, i, t)
5286     if (!evolution_function_is_invariant_p (t, loop_nest->num)
5287         && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5288       return false;
5289
5290   return true;
5291 }
5292
5293 /* This computes the affine dependence relation between A and B with
5294    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
5295    independence between two accesses, while CHREC_DONT_KNOW is used
5296    for representing the unknown relation.
5297
5298    Note that it is possible to stop the computation of the dependence
5299    relation the first time we detect a CHREC_KNOWN element for a given
5300    subscript.  */
5301
5302 void
5303 compute_affine_dependence (struct data_dependence_relation *ddr,
5304                            class loop *loop_nest)
5305 {
5306   struct data_reference *dra = DDR_A (ddr);
5307   struct data_reference *drb = DDR_B (ddr);
5308
5309   if (dump_file && (dump_flags & TDF_DETAILS))
5310     {
5311       fprintf (dump_file, "(compute_affine_dependence\n");
5312       fprintf (dump_file, "  stmt_a: ");
5313       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5314       fprintf (dump_file, "  stmt_b: ");
5315       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5316     }
5317
5318   /* Analyze only when the dependence relation is not yet known.  */
5319   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5320     {
5321       dependence_stats.num_dependence_tests++;
5322
5323       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5324           && access_functions_are_affine_or_constant_p (drb, loop_nest))
5325         subscript_dependence_tester (ddr, loop_nest);
5326
5327       /* As a last case, if the dependence cannot be determined, or if
5328          the dependence is considered too difficult to determine, answer
5329          "don't know".  */
5330       else
5331         {
5332           dependence_stats.num_dependence_undetermined++;
5333
5334           if (dump_file && (dump_flags & TDF_DETAILS))
5335             {
5336               fprintf (dump_file, "Data ref a:\n");
5337               dump_data_reference (dump_file, dra);
5338               fprintf (dump_file, "Data ref b:\n");
5339               dump_data_reference (dump_file, drb);
5340               fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5341             }
5342           finalize_ddr_dependent (ddr, chrec_dont_know);
5343         }
5344     }
5345
5346   if (dump_file && (dump_flags & TDF_DETAILS))
5347     {
5348       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5349         fprintf (dump_file, ") -> no dependence\n");
5350       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5351         fprintf (dump_file, ") -> dependence analysis failed\n");
5352       else
5353         fprintf (dump_file, ")\n");
5354     }
5355 }
5356
5357 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5358    the data references in DATAREFS, in the LOOP_NEST.  When
5359    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5360    relations.  Return true when successful, i.e. data references number
5361    is small enough to be handled.  */
5362
5363 bool
5364 compute_all_dependences (vec<data_reference_p> datarefs,
5365                          vec<ddr_p> *dependence_relations,
5366                          vec<loop_p> loop_nest,
5367                          bool compute_self_and_rr)
5368 {
5369   struct data_dependence_relation *ddr;
5370   struct data_reference *a, *b;
5371   unsigned int i, j;
5372
5373   if ((int) datarefs.length ()
5374       > param_loop_max_datarefs_for_datadeps)
5375     {
5376       struct data_dependence_relation *ddr;
5377
5378       /* Insert a single relation into dependence_relations:
5379          chrec_dont_know.  */
5380       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5381       dependence_relations->safe_push (ddr);
5382       return false;
5383     }
5384
5385   FOR_EACH_VEC_ELT (datarefs, i, a)
5386     for (j = i + 1; datarefs.iterate (j, &b); j++)
5387       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5388         {
5389           ddr = initialize_data_dependence_relation (a, b, loop_nest);
5390           dependence_relations->safe_push (ddr);
5391           if (loop_nest.exists ())
5392             compute_affine_dependence (ddr, loop_nest[0]);
5393         }
5394
5395   if (compute_self_and_rr)
5396     FOR_EACH_VEC_ELT (datarefs, i, a)
5397       {
5398         ddr = initialize_data_dependence_relation (a, a, loop_nest);
5399         dependence_relations->safe_push (ddr);
5400         if (loop_nest.exists ())
5401           compute_affine_dependence (ddr, loop_nest[0]);
5402       }
5403
5404   return true;
5405 }
5406
5407 /* Describes a location of a memory reference.  */
5408
5409 struct data_ref_loc
5410 {
5411   /* The memory reference.  */
5412   tree ref;
5413
5414   /* True if the memory reference is read.  */
5415   bool is_read;
5416
5417   /* True if the data reference is conditional within the containing
5418      statement, i.e. if it might not occur even when the statement
5419      is executed and runs to completion.  */
5420   bool is_conditional_in_stmt;
5421 };
5422
5423
5424 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
5425    true if STMT clobbers memory, false otherwise.  */
5426
5427 static bool
5428 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5429 {
5430   bool clobbers_memory = false;
5431   data_ref_loc ref;
5432   tree op0, op1;
5433   enum gimple_code stmt_code = gimple_code (stmt);
5434
5435   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5436      As we cannot model data-references to not spelled out
5437      accesses give up if they may occur.  */
5438   if (stmt_code == GIMPLE_CALL
5439       && !(gimple_call_flags (stmt) & ECF_CONST))
5440     {
5441       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
5442       if (gimple_call_internal_p (stmt))
5443         switch (gimple_call_internal_fn (stmt))
5444           {
5445           case IFN_GOMP_SIMD_LANE:
5446             {
5447               class loop *loop = gimple_bb (stmt)->loop_father;
5448               tree uid = gimple_call_arg (stmt, 0);
5449               gcc_assert (TREE_CODE (uid) == SSA_NAME);
5450               if (loop == NULL
5451                   || loop->simduid != SSA_NAME_VAR (uid))
5452                 clobbers_memory = true;
5453               break;
5454             }
5455           case IFN_MASK_LOAD:
5456           case IFN_MASK_STORE:
5457             break;
5458           default:
5459             clobbers_memory = true;
5460             break;
5461           }
5462       else
5463         clobbers_memory = true;
5464     }
5465   else if (stmt_code == GIMPLE_ASM
5466            && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5467                || gimple_vuse (stmt)))
5468     clobbers_memory = true;
5469
5470   if (!gimple_vuse (stmt))
5471     return clobbers_memory;
5472
5473   if (stmt_code == GIMPLE_ASSIGN)
5474     {
5475       tree base;
5476       op0 = gimple_assign_lhs (stmt);
5477       op1 = gimple_assign_rhs1 (stmt);
5478
5479       if (DECL_P (op1)
5480           || (REFERENCE_CLASS_P (op1)
5481               && (base = get_base_address (op1))
5482               && TREE_CODE (base) != SSA_NAME
5483               && !is_gimple_min_invariant (base)))
5484         {
5485           ref.ref = op1;
5486           ref.is_read = true;
5487           ref.is_conditional_in_stmt = false;
5488           references->safe_push (ref);
5489         }
5490     }
5491   else if (stmt_code == GIMPLE_CALL)
5492     {
5493       unsigned i, n;
5494       tree ptr, type;
5495       unsigned int align;
5496
5497       ref.is_read = false;
5498       if (gimple_call_internal_p (stmt))
5499         switch (gimple_call_internal_fn (stmt))
5500           {
5501           case IFN_MASK_LOAD:
5502             if (gimple_call_lhs (stmt) == NULL_TREE)
5503               break;
5504             ref.is_read = true;
5505             /* FALLTHRU */
5506           case IFN_MASK_STORE:
5507             ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5508             align = tree_to_shwi (gimple_call_arg (stmt, 1));
5509             if (ref.is_read)
5510               type = TREE_TYPE (gimple_call_lhs (stmt));
5511             else
5512               type = TREE_TYPE (gimple_call_arg (stmt, 3));
5513             if (TYPE_ALIGN (type) != align)
5514               type = build_aligned_type (type, align);
5515             ref.is_conditional_in_stmt = true;
5516             ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5517                                    ptr);
5518             references->safe_push (ref);
5519             return false;
5520           default:
5521             break;
5522           }
5523
5524       op0 = gimple_call_lhs (stmt);
5525       n = gimple_call_num_args (stmt);
5526       for (i = 0; i < n; i++)
5527         {
5528           op1 = gimple_call_arg (stmt, i);
5529
5530           if (DECL_P (op1)
5531               || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5532             {
5533               ref.ref = op1;
5534               ref.is_read = true;
5535               ref.is_conditional_in_stmt = false;
5536               references->safe_push (ref);
5537             }
5538         }
5539     }
5540   else
5541     return clobbers_memory;
5542
5543   if (op0
5544       && (DECL_P (op0)
5545           || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5546     {
5547       ref.ref = op0;
5548       ref.is_read = false;
5549       ref.is_conditional_in_stmt = false;
5550       references->safe_push (ref);
5551     }
5552   return clobbers_memory;
5553 }
5554
5555
5556 /* Returns true if the loop-nest has any data reference.  */
5557
5558 bool
5559 loop_nest_has_data_refs (loop_p loop)
5560 {
5561   basic_block *bbs = get_loop_body (loop);
5562   auto_vec<data_ref_loc, 3> references;
5563
5564   for (unsigned i = 0; i < loop->num_nodes; i++)
5565     {
5566       basic_block bb = bbs[i];
5567       gimple_stmt_iterator bsi;
5568
5569       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5570         {
5571           gimple *stmt = gsi_stmt (bsi);
5572           get_references_in_stmt (stmt, &references);
5573           if (references.length ())
5574             {
5575               free (bbs);
5576               return true;
5577             }
5578         }
5579     }
5580   free (bbs);
5581   return false;
5582 }
5583
5584 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
5585    reference, returns false, otherwise returns true.  NEST is the outermost
5586    loop of the loop nest in which the references should be analyzed.  */
5587
5588 opt_result
5589 find_data_references_in_stmt (class loop *nest, gimple *stmt,
5590                               vec<data_reference_p> *datarefs)
5591 {
5592   unsigned i;
5593   auto_vec<data_ref_loc, 2> references;
5594   data_ref_loc *ref;
5595   data_reference_p dr;
5596
5597   if (get_references_in_stmt (stmt, &references))
5598     return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5599                                    stmt);
5600
5601   FOR_EACH_VEC_ELT (references, i, ref)
5602     {
5603       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5604                             loop_containing_stmt (stmt), ref->ref,
5605                             stmt, ref->is_read, ref->is_conditional_in_stmt);
5606       gcc_assert (dr != NULL);
5607       datarefs->safe_push (dr);
5608     }
5609
5610   return opt_result::success ();
5611 }
5612
5613 /* Stores the data references in STMT to DATAREFS.  If there is an
5614    unanalyzable reference, returns false, otherwise returns true.
5615    NEST is the outermost loop of the loop nest in which the references
5616    should be instantiated, LOOP is the loop in which the references
5617    should be analyzed.  */
5618
5619 bool
5620 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5621                                        vec<data_reference_p> *datarefs)
5622 {
5623   unsigned i;
5624   auto_vec<data_ref_loc, 2> references;
5625   data_ref_loc *ref;
5626   bool ret = true;
5627   data_reference_p dr;
5628
5629   if (get_references_in_stmt (stmt, &references))
5630     return false;
5631
5632   FOR_EACH_VEC_ELT (references, i, ref)
5633     {
5634       dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
5635                             ref->is_conditional_in_stmt);
5636       gcc_assert (dr != NULL);
5637       datarefs->safe_push (dr);
5638     }
5639
5640   return ret;
5641 }
5642
5643 /* Search the data references in LOOP, and record the information into
5644    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5645    difficult case, returns NULL_TREE otherwise.  */
5646
5647 tree
5648 find_data_references_in_bb (class loop *loop, basic_block bb,
5649                             vec<data_reference_p> *datarefs)
5650 {
5651   gimple_stmt_iterator bsi;
5652
5653   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5654     {
5655       gimple *stmt = gsi_stmt (bsi);
5656
5657       if (!find_data_references_in_stmt (loop, stmt, datarefs))
5658         {
5659           struct data_reference *res;
5660           res = XCNEW (struct data_reference);
5661           datarefs->safe_push (res);
5662
5663           return chrec_dont_know;
5664         }
5665     }
5666
5667   return NULL_TREE;
5668 }
5669
5670 /* Search the data references in LOOP, and record the information into
5671    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5672    difficult case, returns NULL_TREE otherwise.
5673
5674    TODO: This function should be made smarter so that it can handle address
5675    arithmetic as if they were array accesses, etc.  */
5676
5677 tree
5678 find_data_references_in_loop (class loop *loop,
5679                               vec<data_reference_p> *datarefs)
5680 {
5681   basic_block bb, *bbs;
5682   unsigned int i;
5683
5684   bbs = get_loop_body_in_dom_order (loop);
5685
5686   for (i = 0; i < loop->num_nodes; i++)
5687     {
5688       bb = bbs[i];
5689
5690       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
5691         {
5692           free (bbs);
5693           return chrec_dont_know;
5694         }
5695     }
5696   free (bbs);
5697
5698   return NULL_TREE;
5699 }
5700
5701 /* Return the alignment in bytes that DRB is guaranteed to have at all
5702    times.  */
5703
5704 unsigned int
5705 dr_alignment (innermost_loop_behavior *drb)
5706 {
5707   /* Get the alignment of BASE_ADDRESS + INIT.  */
5708   unsigned int alignment = drb->base_alignment;
5709   unsigned int misalignment = (drb->base_misalignment
5710                                + TREE_INT_CST_LOW (drb->init));
5711   if (misalignment != 0)
5712     alignment = MIN (alignment, misalignment & -misalignment);
5713
5714   /* Cap it to the alignment of OFFSET.  */
5715   if (!integer_zerop (drb->offset))
5716     alignment = MIN (alignment, drb->offset_alignment);
5717
5718   /* Cap it to the alignment of STEP.  */
5719   if (!integer_zerop (drb->step))
5720     alignment = MIN (alignment, drb->step_alignment);
5721
5722   return alignment;
5723 }
5724
5725 /* If BASE is a pointer-typed SSA name, try to find the object that it
5726    is based on.  Return this object X on success and store the alignment
5727    in bytes of BASE - &X in *ALIGNMENT_OUT.  */
5728
5729 static tree
5730 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
5731 {
5732   if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
5733     return NULL_TREE;
5734
5735   gimple *def = SSA_NAME_DEF_STMT (base);
5736   base = analyze_scalar_evolution (loop_containing_stmt (def), base);
5737
5738   /* Peel chrecs and record the minimum alignment preserved by
5739      all steps.  */
5740   unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5741   while (TREE_CODE (base) == POLYNOMIAL_CHREC)
5742     {
5743       unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
5744       alignment = MIN (alignment, step_alignment);
5745       base = CHREC_LEFT (base);
5746     }
5747
5748   /* Punt if the expression is too complicated to handle.  */
5749   if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
5750     return NULL_TREE;
5751
5752   /* The only useful cases are those for which a dereference folds to something
5753      other than an INDIRECT_REF.  */
5754   tree ref_type = TREE_TYPE (TREE_TYPE (base));
5755   tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
5756   if (!ref)
5757     return NULL_TREE;
5758
5759   /* Analyze the base to which the steps we peeled were applied.  */
5760   poly_int64 bitsize, bitpos, bytepos;
5761   machine_mode mode;
5762   int unsignedp, reversep, volatilep;
5763   tree offset;
5764   base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
5765                               &unsignedp, &reversep, &volatilep);
5766   if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
5767     return NULL_TREE;
5768
5769   /* Restrict the alignment to that guaranteed by the offsets.  */
5770   unsigned int bytepos_alignment = known_alignment (bytepos);
5771   if (bytepos_alignment != 0)
5772     alignment = MIN (alignment, bytepos_alignment);
5773   if (offset)
5774     {
5775       unsigned int offset_alignment = highest_pow2_factor (offset);
5776       alignment = MIN (alignment, offset_alignment);
5777     }
5778
5779   *alignment_out = alignment;
5780   return base;
5781 }
5782
5783 /* Return the object whose alignment would need to be changed in order
5784    to increase the alignment of ADDR.  Store the maximum achievable
5785    alignment in *MAX_ALIGNMENT.  */
5786
5787 tree
5788 get_base_for_alignment (tree addr, unsigned int *max_alignment)
5789 {
5790   tree base = get_base_for_alignment_1 (addr, max_alignment);
5791   if (base)
5792     return base;
5793
5794   if (TREE_CODE (addr) == ADDR_EXPR)
5795     addr = TREE_OPERAND (addr, 0);
5796   *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5797   return addr;
5798 }
5799
5800 /* Recursive helper function.  */
5801
5802 static bool
5803 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
5804 {
5805   /* Inner loops of the nest should not contain siblings.  Example:
5806      when there are two consecutive loops,
5807
5808      | loop_0
5809      |   loop_1
5810      |     A[{0, +, 1}_1]
5811      |   endloop_1
5812      |   loop_2
5813      |     A[{0, +, 1}_2]
5814      |   endloop_2
5815      | endloop_0
5816
5817      the dependence relation cannot be captured by the distance
5818      abstraction.  */
5819   if (loop->next)
5820     return false;
5821
5822   loop_nest->safe_push (loop);
5823   if (loop->inner)
5824     return find_loop_nest_1 (loop->inner, loop_nest);
5825   return true;
5826 }
5827
5828 /* Return false when the LOOP is not well nested.  Otherwise return
5829    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
5830    contain the loops from the outermost to the innermost, as they will
5831    appear in the classic distance vector.  */
5832
5833 bool
5834 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
5835 {
5836   loop_nest->safe_push (loop);
5837   if (loop->inner)
5838     return find_loop_nest_1 (loop->inner, loop_nest);
5839   return true;
5840 }
5841
5842 /* Returns true when the data dependences have been computed, false otherwise.
5843    Given a loop nest LOOP, the following vectors are returned:
5844    DATAREFS is initialized to all the array elements contained in this loop,
5845    DEPENDENCE_RELATIONS contains the relations between the data references.
5846    Compute read-read and self relations if
5847    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
5848
5849 bool
5850 compute_data_dependences_for_loop (class loop *loop,
5851                                    bool compute_self_and_read_read_dependences,
5852                                    vec<loop_p> *loop_nest,
5853                                    vec<data_reference_p> *datarefs,
5854                                    vec<ddr_p> *dependence_relations)
5855 {
5856   bool res = true;
5857
5858   memset (&dependence_stats, 0, sizeof (dependence_stats));
5859
5860   /* If the loop nest is not well formed, or one of the data references
5861      is not computable, give up without spending time to compute other
5862      dependences.  */
5863   if (!loop
5864       || !find_loop_nest (loop, loop_nest)
5865       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
5866       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
5867                                    compute_self_and_read_read_dependences))
5868     res = false;
5869
5870   if (dump_file && (dump_flags & TDF_STATS))
5871     {
5872       fprintf (dump_file, "Dependence tester statistics:\n");
5873
5874       fprintf (dump_file, "Number of dependence tests: %d\n",
5875                dependence_stats.num_dependence_tests);
5876       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
5877                dependence_stats.num_dependence_dependent);
5878       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
5879                dependence_stats.num_dependence_independent);
5880       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
5881                dependence_stats.num_dependence_undetermined);
5882
5883       fprintf (dump_file, "Number of subscript tests: %d\n",
5884                dependence_stats.num_subscript_tests);
5885       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
5886                dependence_stats.num_subscript_undetermined);
5887       fprintf (dump_file, "Number of same subscript function: %d\n",
5888                dependence_stats.num_same_subscript_function);
5889
5890       fprintf (dump_file, "Number of ziv tests: %d\n",
5891                dependence_stats.num_ziv);
5892       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
5893                dependence_stats.num_ziv_dependent);
5894       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
5895                dependence_stats.num_ziv_independent);
5896       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
5897                dependence_stats.num_ziv_unimplemented);
5898
5899       fprintf (dump_file, "Number of siv tests: %d\n",
5900                dependence_stats.num_siv);
5901       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
5902                dependence_stats.num_siv_dependent);
5903       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
5904                dependence_stats.num_siv_independent);
5905       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
5906                dependence_stats.num_siv_unimplemented);
5907
5908       fprintf (dump_file, "Number of miv tests: %d\n",
5909                dependence_stats.num_miv);
5910       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
5911                dependence_stats.num_miv_dependent);
5912       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
5913                dependence_stats.num_miv_independent);
5914       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
5915                dependence_stats.num_miv_unimplemented);
5916     }
5917
5918   return res;
5919 }
5920
5921 /* Free the memory used by a data dependence relation DDR.  */
5922
5923 void
5924 free_dependence_relation (struct data_dependence_relation *ddr)
5925 {
5926   if (ddr == NULL)
5927     return;
5928
5929   if (DDR_SUBSCRIPTS (ddr).exists ())
5930     free_subscripts (DDR_SUBSCRIPTS (ddr));
5931   DDR_DIST_VECTS (ddr).release ();
5932   DDR_DIR_VECTS (ddr).release ();
5933
5934   free (ddr);
5935 }
5936
5937 /* Free the memory used by the data dependence relations from
5938    DEPENDENCE_RELATIONS.  */
5939
5940 void
5941 free_dependence_relations (vec<ddr_p> dependence_relations)
5942 {
5943   unsigned int i;
5944   struct data_dependence_relation *ddr;
5945
5946   FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5947     if (ddr)
5948       free_dependence_relation (ddr);
5949
5950   dependence_relations.release ();
5951 }
5952
5953 /* Free the memory used by the data references from DATAREFS.  */
5954
5955 void
5956 free_data_refs (vec<data_reference_p> datarefs)
5957 {
5958   unsigned int i;
5959   struct data_reference *dr;
5960
5961   FOR_EACH_VEC_ELT (datarefs, i, dr)
5962     free_data_ref (dr);
5963   datarefs.release ();
5964 }
5965
5966 /* Common routine implementing both dr_direction_indicator and
5967    dr_zero_step_indicator.  Return USEFUL_MIN if the indicator is known
5968    to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
5969    Return the step as the indicator otherwise.  */
5970
5971 static tree
5972 dr_step_indicator (struct data_reference *dr, int useful_min)
5973 {
5974   tree step = DR_STEP (dr);
5975   if (!step)
5976     return NULL_TREE;
5977   STRIP_NOPS (step);
5978   /* Look for cases where the step is scaled by a positive constant
5979      integer, which will often be the access size.  If the multiplication
5980      doesn't change the sign (due to overflow effects) then we can
5981      test the unscaled value instead.  */
5982   if (TREE_CODE (step) == MULT_EXPR
5983       && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
5984       && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
5985     {
5986       tree factor = TREE_OPERAND (step, 1);
5987       step = TREE_OPERAND (step, 0);
5988
5989       /* Strip widening and truncating conversions as well as nops.  */
5990       if (CONVERT_EXPR_P (step)
5991           && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
5992         step = TREE_OPERAND (step, 0);
5993       tree type = TREE_TYPE (step);
5994
5995       /* Get the range of step values that would not cause overflow.  */
5996       widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
5997                          / wi::to_widest (factor));
5998       widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
5999                          / wi::to_widest (factor));
6000
6001       /* Get the range of values that the unconverted step actually has.  */
6002       wide_int step_min, step_max;
6003       if (TREE_CODE (step) != SSA_NAME
6004           || get_range_info (step, &step_min, &step_max) != VR_RANGE)
6005         {
6006           step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6007           step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6008         }
6009
6010       /* Check whether the unconverted step has an acceptable range.  */
6011       signop sgn = TYPE_SIGN (type);
6012       if (wi::les_p (minv, widest_int::from (step_min, sgn))
6013           && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6014         {
6015           if (wi::ge_p (step_min, useful_min, sgn))
6016             return ssize_int (useful_min);
6017           else if (wi::lt_p (step_max, 0, sgn))
6018             return ssize_int (-1);
6019           else
6020             return fold_convert (ssizetype, step);
6021         }
6022     }
6023   return DR_STEP (dr);
6024 }
6025
6026 /* Return a value that is negative iff DR has a negative step.  */
6027
6028 tree
6029 dr_direction_indicator (struct data_reference *dr)
6030 {
6031   return dr_step_indicator (dr, 0);
6032 }
6033
6034 /* Return a value that is zero iff DR has a zero step.  */
6035
6036 tree
6037 dr_zero_step_indicator (struct data_reference *dr)
6038 {
6039   return dr_step_indicator (dr, 1);
6040 }
6041
6042 /* Return true if DR is known to have a nonnegative (but possibly zero)
6043    step.  */
6044
6045 bool
6046 dr_known_forward_stride_p (struct data_reference *dr)
6047 {
6048   tree indicator = dr_direction_indicator (dr);
6049   tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6050                                    fold_convert (ssizetype, indicator),
6051                                    ssize_int (0));
6052   return neg_step_val && integer_zerop (neg_step_val);
6053 }