gcc/tree-ssa-loop-ivopts.cc

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2022 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33       Note the interesting uses are categorized and handled in group.
  34       Generally, address type uses are grouped together if their iv bases
  35       are different in constant offset.
  36
  37    2) Candidates for the induction variables are found.  This includes
  38
  39       -- old induction variables
  40       -- the variables defined by expressions derived from the "interesting
  41          groups/uses" above
  42
  43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  44       cost function assigns a cost to sets of induction variables and consists
  45       of three parts:
  46
  47       -- The group/use costs.  Each of the interesting groups/uses chooses
  48          the best induction variable in the set and adds its cost to the sum.
  49          The cost reflects the time spent on modifying the induction variables
  50          value to be usable for the given purpose (adding base and offset for
  51          arrays, etc.).
  52       -- The variable costs.  Each of the variables has a cost assigned that
  53          reflects the costs associated with incrementing the value of the
  54          variable.  The original variables are somewhat preferred.
  55       -- The set cost.  Depending on the size of the set, extra cost may be
  56          added to reflect register pressure.
  57
  58       All the costs are defined in a machine-specific way, using the target
  59       hooks and machine descriptions to determine them.
  60
  61    4) The trees are transformed to use the new variables, the dead code is
  62       removed.
  63
  64    All of this is done loop by loop.  Doing it globally is theoretically
  65    possible, it might give a better performance and it might enable us
  66    to decide costs more precisely, but getting all the interactions right
  67    would be complicated.
  68
  69    For the targets supporting low-overhead loops, IVOPTs has to take care of
  70    the loops which will probably be transformed in RTL doloop optimization,
  71    to try to make selected IV candidate set optimal.  The process of doloop
  72    support includes:
  73
  74    1) Analyze the current loop will be transformed to doloop or not, find and
  75       mark its compare type IV use as doloop use (iv_group field doloop_p), and
  76       set flag doloop_use_p of ivopts_data to notify subsequent processings on
  77       doloop.  See analyze_and_mark_doloop_use and its callees for the details.
  78       The target hook predict_doloop_p can be used for target specific checks.
  79
  80    2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1},
  81       set flag doloop_p of iv_cand, step cost is set as zero and no extra cost
  82       like biv.  For cost determination between doloop IV cand and IV use, the
  83       target hooks doloop_cost_for_generic and doloop_cost_for_address are
  84       provided to add on extra costs for generic type and address type IV use.
  85       Zero cost is assigned to the pair between doloop IV cand and doloop IV
  86       use, and bound zero is set for IV elimination.
  87
  88    3) With the cost setting in step 2), the current cost model based IV
  89       selection algorithm will process as usual, pick up doloop dedicated IV if
  90       profitable.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "backend.h"
  96 #include "rtl.h"
  97 #include "tree.h"
  98 #include "gimple.h"
  99 #include "cfghooks.h"
 100 #include "tree-pass.h"
 101 #include "memmodel.h"
 102 #include "tm_p.h"
 103 #include "ssa.h"
 104 #include "expmed.h"
 105 #include "insn-config.h"
 106 #include "emit-rtl.h"
 107 #include "recog.h"
 108 #include "cgraph.h"
 109 #include "gimple-pretty-print.h"
 110 #include "alias.h"
 111 #include "fold-const.h"
 112 #include "stor-layout.h"
 113 #include "tree-eh.h"
 114 #include "gimplify.h"
 115 #include "gimple-iterator.h"
 116 #include "gimplify-me.h"
 117 #include "tree-cfg.h"
 118 #include "tree-ssa-loop-ivopts.h"
 119 #include "tree-ssa-loop-manip.h"
 120 #include "tree-ssa-loop-niter.h"
 121 #include "tree-ssa-loop.h"
 122 #include "explow.h"
 123 #include "expr.h"
 124 #include "tree-dfa.h"
 125 #include "tree-ssa.h"
 126 #include "cfgloop.h"
 127 #include "tree-scalar-evolution.h"
 128 #include "tree-affine.h"
 129 #include "tree-ssa-propagate.h"
 130 #include "tree-ssa-address.h"
 131 #include "builtins.h"
 132 #include "tree-vectorizer.h"
 133 #include "dbgcnt.h"
 134 #include "cfganal.h"
 135
 136 /* For lang_hooks.types.type_for_mode.  */
 137 #include "langhooks.h"
 138
 139 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 140    cost of different addressing modes.  This should be moved to a TBD
 141    interface between the GIMPLE and RTL worlds.  */
 142
 143 /* The infinite cost.  */
 144 #define INFTY 1000000000
 145
 146 /* Returns the expected number of loop iterations for LOOP.
 147    The average trip count is computed from profile data if it
 148    exists. */
 149
 150 static inline HOST_WIDE_INT
 151 avg_loop_niter (class loop *loop)
 152 {
 153   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 154   if (niter == -1)
 155     {
 156       niter = likely_max_stmt_executions_int (loop);
 157
 158       if (niter == -1 || niter > param_avg_loop_niter)
 159         return param_avg_loop_niter;
 160     }
 161
 162   return niter;
 163 }
 164
 165 struct iv_use;
 166
 167 /* Representation of the induction variable.  */
 168 struct iv
 169 {
 170   tree base;            /* Initial value of the iv.  */
 171   tree base_object;     /* A memory object to that the induction variable points.  */
 172   tree step;            /* Step of the iv (constant only).  */
 173   tree ssa_name;        /* The ssa name with the value.  */
 174   struct iv_use *nonlin_use;    /* The identifier in the use if it is the case.  */
 175   bool biv_p;           /* Is it a biv?  */
 176   bool no_overflow;     /* True if the iv doesn't overflow.  */
 177   bool have_address_use;/* For biv, indicate if it's used in any address
 178                            type use.  */
 179 };
 180
 181 /* Per-ssa version information (induction variable descriptions, etc.).  */
 182 struct version_info
 183 {
 184   tree name;            /* The ssa name.  */
 185   struct iv *iv;        /* Induction variable description.  */
 186   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 187                            an expression that is not an induction variable.  */
 188   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 189   unsigned inv_id;      /* Id of an invariant.  */
 190 };
 191
 192 /* Types of uses.  */
 193 enum use_type
 194 {
 195   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 196   USE_REF_ADDRESS,      /* Use is an address for an explicit memory
 197                            reference.  */
 198   USE_PTR_ADDRESS,      /* Use is a pointer argument to a function in
 199                            cases where the expansion of the function
 200                            will turn the argument into a normal address.  */
 201   USE_COMPARE           /* Use is a compare.  */
 202 };
 203
 204 /* Cost of a computation.  */
 205 class comp_cost
 206 {
 207 public:
 208   comp_cost (): cost (0), complexity (0), scratch (0)
 209   {}
 210
 211   comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
 212     : cost (cost), complexity (complexity), scratch (scratch)
 213   {}
 214
 215   /* Returns true if COST is infinite.  */
 216   bool infinite_cost_p ();
 217
 218   /* Adds costs COST1 and COST2.  */
 219   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
 220
 221   /* Adds COST to the comp_cost.  */
 222   comp_cost operator+= (comp_cost cost);
 223
 224   /* Adds constant C to this comp_cost.  */
 225   comp_cost operator+= (HOST_WIDE_INT c);
 226
 227   /* Subtracts constant C to this comp_cost.  */
 228   comp_cost operator-= (HOST_WIDE_INT c);
 229
 230   /* Divide the comp_cost by constant C.  */
 231   comp_cost operator/= (HOST_WIDE_INT c);
 232
 233   /* Multiply the comp_cost by constant C.  */
 234   comp_cost operator*= (HOST_WIDE_INT c);
 235
 236   /* Subtracts costs COST1 and COST2.  */
 237   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
 238
 239   /* Subtracts COST from this comp_cost.  */
 240   comp_cost operator-= (comp_cost cost);
 241
 242   /* Returns true if COST1 is smaller than COST2.  */
 243   friend bool operator< (comp_cost cost1, comp_cost cost2);
 244
 245   /* Returns true if COST1 and COST2 are equal.  */
 246   friend bool operator== (comp_cost cost1, comp_cost cost2);
 247
 248   /* Returns true if COST1 is smaller or equal than COST2.  */
 249   friend bool operator<= (comp_cost cost1, comp_cost cost2);
 250
 251   int64_t cost;         /* The runtime cost.  */
 252   unsigned complexity;  /* The estimate of the complexity of the code for
 253                            the computation (in no concrete units --
 254                            complexity field should be larger for more
 255                            complex expressions and addressing modes).  */
 256   int64_t scratch;      /* Scratch used during cost computation.  */
 257 };
 258
 259 static const comp_cost no_cost;
 260 static const comp_cost infinite_cost (INFTY, 0, INFTY);
 261
 262 bool
 263 comp_cost::infinite_cost_p ()
 264 {
 265   return cost == INFTY;
 266 }
 267
 268 comp_cost
 269 operator+ (comp_cost cost1, comp_cost cost2)
 270 {
 271   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
 272     return infinite_cost;
 273
 274   gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
 275   cost1.cost += cost2.cost;
 276   cost1.complexity += cost2.complexity;
 277
 278   return cost1;
 279 }
 280
 281 comp_cost
 282 operator- (comp_cost cost1, comp_cost cost2)
 283 {
 284   if (cost1.infinite_cost_p ())
 285     return infinite_cost;
 286
 287   gcc_assert (!cost2.infinite_cost_p ());
 288   gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
 289
 290   cost1.cost -= cost2.cost;
 291   cost1.complexity -= cost2.complexity;
 292
 293   return cost1;
 294 }
 295
 296 comp_cost
 297 comp_cost::operator+= (comp_cost cost)
 298 {
 299   *this = *this + cost;
 300   return *this;
 301 }
 302
 303 comp_cost
 304 comp_cost::operator+= (HOST_WIDE_INT c)
 305 {
 306   if (c >= INFTY)
 307     this->cost = INFTY;
 308
 309   if (infinite_cost_p ())
 310     return *this;
 311
 312   gcc_assert (this->cost + c < infinite_cost.cost);
 313   this->cost += c;
 314
 315   return *this;
 316 }
 317
 318 comp_cost
 319 comp_cost::operator-= (HOST_WIDE_INT c)
 320 {
 321   if (infinite_cost_p ())
 322     return *this;
 323
 324   gcc_assert (this->cost - c < infinite_cost.cost);
 325   this->cost -= c;
 326
 327   return *this;
 328 }
 329
 330 comp_cost
 331 comp_cost::operator/= (HOST_WIDE_INT c)
 332 {
 333   gcc_assert (c != 0);
 334   if (infinite_cost_p ())
 335     return *this;
 336
 337   this->cost /= c;
 338
 339   return *this;
 340 }
 341
 342 comp_cost
 343 comp_cost::operator*= (HOST_WIDE_INT c)
 344 {
 345   if (infinite_cost_p ())
 346     return *this;
 347
 348   gcc_assert (this->cost * c < infinite_cost.cost);
 349   this->cost *= c;
 350
 351   return *this;
 352 }
 353
 354 comp_cost
 355 comp_cost::operator-= (comp_cost cost)
 356 {
 357   *this = *this - cost;
 358   return *this;
 359 }
 360
 361 bool
 362 operator< (comp_cost cost1, comp_cost cost2)
 363 {
 364   if (cost1.cost == cost2.cost)
 365     return cost1.complexity < cost2.complexity;
 366
 367   return cost1.cost < cost2.cost;
 368 }
 369
 370 bool
 371 operator== (comp_cost cost1, comp_cost cost2)
 372 {
 373   return cost1.cost == cost2.cost
 374     && cost1.complexity == cost2.complexity;
 375 }
 376
 377 bool
 378 operator<= (comp_cost cost1, comp_cost cost2)
 379 {
 380   return cost1 < cost2 || cost1 == cost2;
 381 }
 382
 383 struct iv_inv_expr_ent;
 384
 385 /* The candidate - cost pair.  */
 386 class cost_pair
 387 {
 388 public:
 389   struct iv_cand *cand; /* The candidate.  */
 390   comp_cost cost;       /* The cost.  */
 391   enum tree_code comp;  /* For iv elimination, the comparison.  */
 392   bitmap inv_vars;      /* The list of invariant ssa_vars that have to be
 393                            preserved when representing iv_use with iv_cand.  */
 394   bitmap inv_exprs;     /* The list of newly created invariant expressions
 395                            when representing iv_use with iv_cand.  */
 396   tree value;           /* For final value elimination, the expression for
 397                            the final value of the iv.  For iv elimination,
 398                            the new bound to compare with.  */
 399 };
 400
 401 /* Use.  */
 402 struct iv_use
 403 {
 404   unsigned id;          /* The id of the use.  */
 405   unsigned group_id;    /* The group id the use belongs to.  */
 406   enum use_type type;   /* Type of the use.  */
 407   tree mem_type;        /* The memory type to use when testing whether an
 408                            address is legitimate, and what the address's
 409                            cost is.  */
 410   struct iv *iv;        /* The induction variable it is based on.  */
 411   gimple *stmt;         /* Statement in that it occurs.  */
 412   tree *op_p;           /* The place where it occurs.  */
 413
 414   tree addr_base;       /* Base address with const offset stripped.  */
 415   poly_uint64_pod addr_offset;
 416                         /* Const offset stripped from base address.  */
 417 };
 418
 419 /* Group of uses.  */
 420 struct iv_group
 421 {
 422   /* The id of the group.  */
 423   unsigned id;
 424   /* Uses of the group are of the same type.  */
 425   enum use_type type;
 426   /* The set of "related" IV candidates, plus the important ones.  */
 427   bitmap related_cands;
 428   /* Number of IV candidates in the cost_map.  */
 429   unsigned n_map_members;
 430   /* The costs wrto the iv candidates.  */
 431   class cost_pair *cost_map;
 432   /* The selected candidate for the group.  */
 433   struct iv_cand *selected;
 434   /* To indicate this is a doloop use group.  */
 435   bool doloop_p;
 436   /* Uses in the group.  */
 437   vec<struct iv_use *> vuses;
 438 };
 439
 440 /* The position where the iv is computed.  */
 441 enum iv_position
 442 {
 443   IP_NORMAL,            /* At the end, just before the exit condition.  */
 444   IP_END,               /* At the end of the latch block.  */
 445   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 446   IP_AFTER_USE,         /* Immediately after a specific use.  */
 447   IP_ORIGINAL           /* The original biv.  */
 448 };
 449
 450 /* The induction variable candidate.  */
 451 struct iv_cand
 452 {
 453   unsigned id;          /* The number of the candidate.  */
 454   bool important;       /* Whether this is an "important" candidate, i.e. such
 455                            that it should be considered by all uses.  */
 456   bool involves_undefs; /* Whether the IV involves undefined values.  */
 457   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 458   gimple *incremented_at;/* For original biv, the statement where it is
 459                            incremented.  */
 460   tree var_before;      /* The variable used for it before increment.  */
 461   tree var_after;       /* The variable used for it after increment.  */
 462   struct iv *iv;        /* The value of the candidate.  NULL for
 463                            "pseudocandidate" used to indicate the possibility
 464                            to replace the final value of an iv by direct
 465                            computation of the value.  */
 466   unsigned cost;        /* Cost of the candidate.  */
 467   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 468   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 469                               where it is incremented.  */
 470   bitmap inv_vars;      /* The list of invariant ssa_vars used in step of the
 471                            iv_cand.  */
 472   bitmap inv_exprs;     /* If step is more complicated than a single ssa_var,
 473                            handle it as a new invariant expression which will
 474                            be hoisted out of loop.  */
 475   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 476                            smaller type.  */
 477   bool doloop_p;        /* Whether this is a doloop candidate.  */
 478 };
 479
 480 /* Hashtable entry for common candidate derived from iv uses.  */
 481 class iv_common_cand
 482 {
 483 public:
 484   tree base;
 485   tree step;
 486   /* IV uses from which this common candidate is derived.  */
 487   auto_vec<struct iv_use *> uses;
 488   hashval_t hash;
 489 };
 490
 491 /* Hashtable helpers.  */
 492
 493 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 494 {
 495   static inline hashval_t hash (const iv_common_cand *);
 496   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 497 };
 498
 499 /* Hash function for possible common candidates.  */
 500
 501 inline hashval_t
 502 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 503 {
 504   return ccand->hash;
 505 }
 506
 507 /* Hash table equality function for common candidates.  */
 508
 509 inline bool
 510 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 511                               const iv_common_cand *ccand2)
 512 {
 513   return (ccand1->hash == ccand2->hash
 514           && operand_equal_p (ccand1->base, ccand2->base, 0)
 515           && operand_equal_p (ccand1->step, ccand2->step, 0)
 516           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 517               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 518 }
 519
 520 /* Loop invariant expression hashtable entry.  */
 521
 522 struct iv_inv_expr_ent
 523 {
 524   /* Tree expression of the entry.  */
 525   tree expr;
 526   /* Unique indentifier.  */
 527   int id;
 528   /* Hash value.  */
 529   hashval_t hash;
 530 };
 531
 532 /* Sort iv_inv_expr_ent pair A and B by id field.  */
 533
 534 static int
 535 sort_iv_inv_expr_ent (const void *a, const void *b)
 536 {
 537   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
 538   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
 539
 540   unsigned id1 = (*e1)->id;
 541   unsigned id2 = (*e2)->id;
 542
 543   if (id1 < id2)
 544     return -1;
 545   else if (id1 > id2)
 546     return 1;
 547   else
 548     return 0;
 549 }
 550
 551 /* Hashtable helpers.  */
 552
 553 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 554 {
 555   static inline hashval_t hash (const iv_inv_expr_ent *);
 556   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 557 };
 558
 559 /* Return true if uses of type TYPE represent some form of address.  */
 560
 561 inline bool
 562 address_p (use_type type)
 563 {
 564   return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
 565 }
 566
 567 /* Hash function for loop invariant expressions.  */
 568
 569 inline hashval_t
 570 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 571 {
 572   return expr->hash;
 573 }
 574
 575 /* Hash table equality function for expressions.  */
 576
 577 inline bool
 578 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 579                            const iv_inv_expr_ent *expr2)
 580 {
 581   return expr1->hash == expr2->hash
 582          && operand_equal_p (expr1->expr, expr2->expr, 0);
 583 }
 584
 585 struct ivopts_data
 586 {
 587   /* The currently optimized loop.  */
 588   class loop *current_loop;
 589   location_t loop_loc;
 590
 591   /* Numbers of iterations for all exits of the current loop.  */
 592   hash_map<edge, tree_niter_desc *> *niters;
 593
 594   /* Number of registers used in it.  */
 595   unsigned regs_used;
 596
 597   /* The size of version_info array allocated.  */
 598   unsigned version_info_size;
 599
 600   /* The array of information for the ssa names.  */
 601   struct version_info *version_info;
 602
 603   /* The hashtable of loop invariant expressions created
 604      by ivopt.  */
 605   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 606
 607   /* The bitmap of indices in version_info whose value was changed.  */
 608   bitmap relevant;
 609
 610   /* The uses of induction variables.  */
 611   vec<iv_group *> vgroups;
 612
 613   /* The candidates.  */
 614   vec<iv_cand *> vcands;
 615
 616   /* A bitmap of important candidates.  */
 617   bitmap important_candidates;
 618
 619   /* Cache used by tree_to_aff_combination_expand.  */
 620   hash_map<tree, name_expansion *> *name_expansion_cache;
 621
 622   /* The hashtable of common candidates derived from iv uses.  */
 623   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 624
 625   /* The common candidates.  */
 626   vec<iv_common_cand *> iv_common_cands;
 627
 628   /* Hash map recording base object information of tree exp.  */
 629   hash_map<tree, tree> *base_object_map;
 630
 631   /* The maximum invariant variable id.  */
 632   unsigned max_inv_var_id;
 633
 634   /* The maximum invariant expression id.  */
 635   unsigned max_inv_expr_id;
 636
 637   /* Number of no_overflow BIVs which are not used in memory address.  */
 638   unsigned bivs_not_used_in_addr;
 639
 640   /* Obstack for iv structure.  */
 641   struct obstack iv_obstack;
 642
 643   /* Whether to consider just related and important candidates when replacing a
 644      use.  */
 645   bool consider_all_candidates;
 646
 647   /* Are we optimizing for speed?  */
 648   bool speed;
 649
 650   /* Whether the loop body includes any function calls.  */
 651   bool body_includes_call;
 652
 653   /* Whether the loop body can only be exited via single exit.  */
 654   bool loop_single_exit_p;
 655
 656   /* Whether the loop has doloop comparison use.  */
 657   bool doloop_use_p;
 658 };
 659
 660 /* An assignment of iv candidates to uses.  */
 661
 662 class iv_ca
 663 {
 664 public:
 665   /* The number of uses covered by the assignment.  */
 666   unsigned upto;
 667
 668   /* Number of uses that cannot be expressed by the candidates in the set.  */
 669   unsigned bad_groups;
 670
 671   /* Candidate assigned to a use, together with the related costs.  */
 672   class cost_pair **cand_for_group;
 673
 674   /* Number of times each candidate is used.  */
 675   unsigned *n_cand_uses;
 676
 677   /* The candidates used.  */
 678   bitmap cands;
 679
 680   /* The number of candidates in the set.  */
 681   unsigned n_cands;
 682
 683   /* The number of invariants needed, including both invariant variants and
 684      invariant expressions.  */
 685   unsigned n_invs;
 686
 687   /* Total cost of expressing uses.  */
 688   comp_cost cand_use_cost;
 689
 690   /* Total cost of candidates.  */
 691   int64_t cand_cost;
 692
 693   /* Number of times each invariant variable is used.  */
 694   unsigned *n_inv_var_uses;
 695
 696   /* Number of times each invariant expression is used.  */
 697   unsigned *n_inv_expr_uses;
 698
 699   /* Total cost of the assignment.  */
 700   comp_cost cost;
 701 };
 702
 703 /* Difference of two iv candidate assignments.  */
 704
 705 struct iv_ca_delta
 706 {
 707   /* Changed group.  */
 708   struct iv_group *group;
 709
 710   /* An old assignment (for rollback purposes).  */
 711   class cost_pair *old_cp;
 712
 713   /* A new assignment.  */
 714   class cost_pair *new_cp;
 715
 716   /* Next change in the list.  */
 717   struct iv_ca_delta *next;
 718 };
 719
 720 /* Bound on number of candidates below that all candidates are considered.  */
 721
 722 #define CONSIDER_ALL_CANDIDATES_BOUND \
 723   ((unsigned) param_iv_consider_all_candidates_bound)
 724
 725 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 726    optimizing such a loop would help, and it would take ages).  */
 727
 728 #define MAX_CONSIDERED_GROUPS \
 729   ((unsigned) param_iv_max_considered_uses)
 730
 731 /* If there are at most this number of ivs in the set, try removing unnecessary
 732    ivs from the set always.  */
 733
 734 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 735   ((unsigned) param_iv_always_prune_cand_set_bound)
 736
 737 /* The list of trees for that the decl_rtl field must be reset is stored
 738    here.  */
 739
 740 static vec<tree> decl_rtl_to_reset;
 741
 742 static comp_cost force_expr_to_var_cost (tree, bool);
 743
 744 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 745
 746 edge
 747 single_dom_exit (class loop *loop)
 748 {
 749   edge exit = single_exit (loop);
 750
 751   if (!exit)
 752     return NULL;
 753
 754   if (!just_once_each_iteration_p (loop, exit->src))
 755     return NULL;
 756
 757   return exit;
 758 }
 759
 760 /* Dumps information about the induction variable IV to FILE.  Don't dump
 761    variable's name if DUMP_NAME is FALSE.  The information is dumped with
 762    preceding spaces indicated by INDENT_LEVEL.  */
 763
 764 void
 765 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
 766 {
 767   const char *p;
 768   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
 769
 770   if (indent_level > 4)
 771     indent_level = 4;
 772   p = spaces + 8 - (indent_level << 1);
 773
 774   fprintf (file, "%sIV struct:\n", p);
 775   if (iv->ssa_name && dump_name)
 776     {
 777       fprintf (file, "%s  SSA_NAME:\t", p);
 778       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 779       fprintf (file, "\n");
 780     }
 781
 782   fprintf (file, "%s  Type:\t", p);
 783   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 784   fprintf (file, "\n");
 785
 786   fprintf (file, "%s  Base:\t", p);
 787   print_generic_expr (file, iv->base, TDF_SLIM);
 788   fprintf (file, "\n");
 789
 790   fprintf (file, "%s  Step:\t", p);
 791   print_generic_expr (file, iv->step, TDF_SLIM);
 792   fprintf (file, "\n");
 793
 794   if (iv->base_object)
 795     {
 796       fprintf (file, "%s  Object:\t", p);
 797       print_generic_expr (file, iv->base_object, TDF_SLIM);
 798       fprintf (file, "\n");
 799     }
 800
 801   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
 802
 803   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
 804            p, iv->no_overflow ? "No-overflow" : "Overflow");
 805 }
 806
 807 /* Dumps information about the USE to FILE.  */
 808
 809 void
 810 dump_use (FILE *file, struct iv_use *use)
 811 {
 812   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
 813   fprintf (file, "    At stmt:\t");
 814   print_gimple_stmt (file, use->stmt, 0);
 815   fprintf (file, "    At pos:\t");
 816   if (use->op_p)
 817     print_generic_expr (file, *use->op_p, TDF_SLIM);
 818   fprintf (file, "\n");
 819   dump_iv (file, use->iv, false, 2);
 820 }
 821
 822 /* Dumps information about the uses to FILE.  */
 823
 824 void
 825 dump_groups (FILE *file, struct ivopts_data *data)
 826 {
 827   unsigned i, j;
 828   struct iv_group *group;
 829
 830   for (i = 0; i < data->vgroups.length (); i++)
 831     {
 832       group = data->vgroups[i];
 833       fprintf (file, "Group %d:\n", group->id);
 834       if (group->type == USE_NONLINEAR_EXPR)
 835         fprintf (file, "  Type:\tGENERIC\n");
 836       else if (group->type == USE_REF_ADDRESS)
 837         fprintf (file, "  Type:\tREFERENCE ADDRESS\n");
 838       else if (group->type == USE_PTR_ADDRESS)
 839         fprintf (file, "  Type:\tPOINTER ARGUMENT ADDRESS\n");
 840       else
 841         {
 842           gcc_assert (group->type == USE_COMPARE);
 843           fprintf (file, "  Type:\tCOMPARE\n");
 844         }
 845       for (j = 0; j < group->vuses.length (); j++)
 846         dump_use (file, group->vuses[j]);
 847     }
 848 }
 849
 850 /* Dumps information about induction variable candidate CAND to FILE.  */
 851
 852 void
 853 dump_cand (FILE *file, struct iv_cand *cand)
 854 {
 855   struct iv *iv = cand->iv;
 856
 857   fprintf (file, "Candidate %d:\n", cand->id);
 858   if (cand->inv_vars)
 859     {
 860       fprintf (file, "  Depend on inv.vars: ");
 861       dump_bitmap (file, cand->inv_vars);
 862     }
 863   if (cand->inv_exprs)
 864     {
 865       fprintf (file, "  Depend on inv.exprs: ");
 866       dump_bitmap (file, cand->inv_exprs);
 867     }
 868
 869   if (cand->var_before)
 870     {
 871       fprintf (file, "  Var befor: ");
 872       print_generic_expr (file, cand->var_before, TDF_SLIM);
 873       fprintf (file, "\n");
 874     }
 875   if (cand->var_after)
 876     {
 877       fprintf (file, "  Var after: ");
 878       print_generic_expr (file, cand->var_after, TDF_SLIM);
 879       fprintf (file, "\n");
 880     }
 881
 882   switch (cand->pos)
 883     {
 884     case IP_NORMAL:
 885       fprintf (file, "  Incr POS: before exit test\n");
 886       break;
 887
 888     case IP_BEFORE_USE:
 889       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
 890       break;
 891
 892     case IP_AFTER_USE:
 893       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
 894       break;
 895
 896     case IP_END:
 897       fprintf (file, "  Incr POS: at end\n");
 898       break;
 899
 900     case IP_ORIGINAL:
 901       fprintf (file, "  Incr POS: orig biv\n");
 902       break;
 903     }
 904
 905   dump_iv (file, iv, false, 1);
 906 }
 907
 908 /* Returns the info for ssa version VER.  */
 909
 910 static inline struct version_info *
 911 ver_info (struct ivopts_data *data, unsigned ver)
 912 {
 913   return data->version_info + ver;
 914 }
 915
 916 /* Returns the info for ssa name NAME.  */
 917
 918 static inline struct version_info *
 919 name_info (struct ivopts_data *data, tree name)
 920 {
 921   return ver_info (data, SSA_NAME_VERSION (name));
 922 }
 923
 924 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 925    emitted in LOOP.  */
 926
 927 static bool
 928 stmt_after_ip_normal_pos (class loop *loop, gimple *stmt)
 929 {
 930   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 931
 932   gcc_assert (bb);
 933
 934   if (sbb == loop->latch)
 935     return true;
 936
 937   if (sbb != bb)
 938     return false;
 939
 940   return stmt == last_stmt (bb);
 941 }
 942
 943 /* Returns true if STMT if after the place where the original induction
 944    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 945    if the positions are identical.  */
 946
 947 static bool
 948 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 949 {
 950   basic_block cand_bb = gimple_bb (cand->incremented_at);
 951   basic_block stmt_bb = gimple_bb (stmt);
 952
 953   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 954     return false;
 955
 956   if (stmt_bb != cand_bb)
 957     return true;
 958
 959   if (true_if_equal
 960       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 961     return true;
 962   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 963 }
 964
 965 /* Returns true if STMT if after the place where the induction variable
 966    CAND is incremented in LOOP.  */
 967
 968 static bool
 969 stmt_after_increment (class loop *loop, struct iv_cand *cand, gimple *stmt)
 970 {
 971   switch (cand->pos)
 972     {
 973     case IP_END:
 974       return false;
 975
 976     case IP_NORMAL:
 977       return stmt_after_ip_normal_pos (loop, stmt);
 978
 979     case IP_ORIGINAL:
 980     case IP_AFTER_USE:
 981       return stmt_after_inc_pos (cand, stmt, false);
 982
 983     case IP_BEFORE_USE:
 984       return stmt_after_inc_pos (cand, stmt, true);
 985
 986     default:
 987       gcc_unreachable ();
 988     }
 989 }
 990
 991 /* walk_tree callback for contains_abnormal_ssa_name_p.  */
 992
 993 static tree
 994 contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
 995 {
 996   if (TREE_CODE (*tp) == SSA_NAME
 997       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
 998     return *tp;
 999
1000   if (!EXPR_P (*tp))
1001     *walk_subtrees = 0;
1002
1003   return NULL_TREE;
1004 }
1005
1006 /* Returns true if EXPR contains a ssa name that occurs in an
1007    abnormal phi node.  */
1008
1009 bool
1010 contains_abnormal_ssa_name_p (tree expr)
1011 {
1012   return walk_tree_without_duplicates
1013            (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
1014 }
1015
1016 /*  Returns the structure describing number of iterations determined from
1017     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1018
1019 static class tree_niter_desc *
1020 niter_for_exit (struct ivopts_data *data, edge exit)
1021 {
1022   class tree_niter_desc *desc;
1023   tree_niter_desc **slot;
1024
1025   if (!data->niters)
1026     {
1027       data->niters = new hash_map<edge, tree_niter_desc *>;
1028       slot = NULL;
1029     }
1030   else
1031     slot = data->niters->get (exit);
1032
1033   if (!slot)
1034     {
1035       /* Try to determine number of iterations.  We cannot safely work with ssa
1036          names that appear in phi nodes on abnormal edges, so that we do not
1037          create overlapping life ranges for them (PR 27283).  */
1038       desc = XNEW (class tree_niter_desc);
1039       if (!number_of_iterations_exit (data->current_loop,
1040                                       exit, desc, true)
1041           || contains_abnormal_ssa_name_p (desc->niter))
1042         {
1043           XDELETE (desc);
1044           desc = NULL;
1045         }
1046       data->niters->put (exit, desc);
1047     }
1048   else
1049     desc = *slot;
1050
1051   return desc;
1052 }
1053
1054 /* Returns the structure describing number of iterations determined from
1055    single dominating exit of DATA->current_loop, or NULL if something
1056    goes wrong.  */
1057
1058 static class tree_niter_desc *
1059 niter_for_single_dom_exit (struct ivopts_data *data)
1060 {
1061   edge exit = single_dom_exit (data->current_loop);
1062
1063   if (!exit)
1064     return NULL;
1065
1066   return niter_for_exit (data, exit);
1067 }
1068
1069 /* Initializes data structures used by the iv optimization pass, stored
1070    in DATA.  */
1071
1072 static void
1073 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1074 {
1075   data->version_info_size = 2 * num_ssa_names;
1076   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1077   data->relevant = BITMAP_ALLOC (NULL);
1078   data->important_candidates = BITMAP_ALLOC (NULL);
1079   data->max_inv_var_id = 0;
1080   data->max_inv_expr_id = 0;
1081   data->niters = NULL;
1082   data->vgroups.create (20);
1083   data->vcands.create (20);
1084   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1085   data->name_expansion_cache = NULL;
1086   data->base_object_map = NULL;
1087   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1088   data->iv_common_cands.create (20);
1089   decl_rtl_to_reset.create (20);
1090   gcc_obstack_init (&data->iv_obstack);
1091 }
1092
1093 /* walk_tree callback for determine_base_object.  */
1094
1095 static tree
1096 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1097 {
1098   tree_code code = TREE_CODE (*tp);
1099   tree obj = NULL_TREE;
1100   if (code == ADDR_EXPR)
1101     {
1102       tree base = get_base_address (TREE_OPERAND (*tp, 0));
1103       if (!base)
1104         obj = *tp;
1105       else if (TREE_CODE (base) != MEM_REF)
1106         obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1107     }
1108   else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1109         obj = fold_convert (ptr_type_node, *tp);
1110
1111   if (!obj)
1112     {
1113       if (!EXPR_P (*tp))
1114         *walk_subtrees = 0;
1115
1116       return NULL_TREE;
1117     }
1118   /* Record special node for multiple base objects and stop.  */
1119   if (*static_cast<tree *> (wdata))
1120     {
1121       *static_cast<tree *> (wdata) = integer_zero_node;
1122       return integer_zero_node;
1123     }
1124   /* Record the base object and continue looking.  */
1125   *static_cast<tree *> (wdata) = obj;
1126   return NULL_TREE;
1127 }
1128
1129 /* Returns a memory object to that EXPR points with caching.  Return NULL if we
1130    are able to determine that it does not point to any such object; specially
1131    return integer_zero_node if EXPR contains multiple base objects.  */
1132
1133 static tree
1134 determine_base_object (struct ivopts_data *data, tree expr)
1135 {
1136   tree *slot, obj = NULL_TREE;
1137   if (data->base_object_map)
1138     {
1139       if ((slot = data->base_object_map->get(expr)) != NULL)
1140         return *slot;
1141     }
1142   else
1143     data->base_object_map = new hash_map<tree, tree>;
1144
1145   (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1146   data->base_object_map->put (expr, obj);
1147   return obj;
1148 }
1149
1150 /* Return true if address expression with non-DECL_P operand appears
1151    in EXPR.  */
1152
1153 static bool
1154 contain_complex_addr_expr (tree expr)
1155 {
1156   bool res = false;
1157
1158   STRIP_NOPS (expr);
1159   switch (TREE_CODE (expr))
1160     {
1161     case POINTER_PLUS_EXPR:
1162     case PLUS_EXPR:
1163     case MINUS_EXPR:
1164       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1165       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1166       break;
1167
1168     case ADDR_EXPR:
1169       return (!DECL_P (TREE_OPERAND (expr, 0)));
1170
1171     default:
1172       return false;
1173     }
1174
1175   return res;
1176 }
1177
1178 /* Allocates an induction variable with given initial value BASE and step STEP
1179    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1180
1181 static struct iv *
1182 alloc_iv (struct ivopts_data *data, tree base, tree step,
1183           bool no_overflow = false)
1184 {
1185   tree expr = base;
1186   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1187                                               sizeof (struct iv));
1188   gcc_assert (step != NULL_TREE);
1189
1190   /* Lower address expression in base except ones with DECL_P as operand.
1191      By doing this:
1192        1) More accurate cost can be computed for address expressions;
1193        2) Duplicate candidates won't be created for bases in different
1194           forms, like &a[0] and &a.  */
1195   STRIP_NOPS (expr);
1196   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1197       || contain_complex_addr_expr (expr))
1198     {
1199       aff_tree comb;
1200       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1201       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1202     }
1203
1204   iv->base = base;
1205   iv->base_object = determine_base_object (data, base);
1206   iv->step = step;
1207   iv->biv_p = false;
1208   iv->nonlin_use = NULL;
1209   iv->ssa_name = NULL_TREE;
1210   if (!no_overflow
1211        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1212                               base, step))
1213     no_overflow = true;
1214   iv->no_overflow = no_overflow;
1215   iv->have_address_use = false;
1216
1217   return iv;
1218 }
1219
1220 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1221    doesn't overflow.  */
1222
1223 static void
1224 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1225         bool no_overflow)
1226 {
1227   struct version_info *info = name_info (data, iv);
1228
1229   gcc_assert (!info->iv);
1230
1231   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1232   info->iv = alloc_iv (data, base, step, no_overflow);
1233   info->iv->ssa_name = iv;
1234 }
1235
1236 /* Finds induction variable declaration for VAR.  */
1237
1238 static struct iv *
1239 get_iv (struct ivopts_data *data, tree var)
1240 {
1241   basic_block bb;
1242   tree type = TREE_TYPE (var);
1243
1244   if (!POINTER_TYPE_P (type)
1245       && !INTEGRAL_TYPE_P (type))
1246     return NULL;
1247
1248   if (!name_info (data, var)->iv)
1249     {
1250       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1251
1252       if (!bb
1253           || !flow_bb_inside_loop_p (data->current_loop, bb))
1254         {
1255           if (POINTER_TYPE_P (type))
1256             type = sizetype;
1257           set_iv (data, var, var, build_int_cst (type, 0), true);
1258         }
1259     }
1260
1261   return name_info (data, var)->iv;
1262 }
1263
1264 /* Return the first non-invariant ssa var found in EXPR.  */
1265
1266 static tree
1267 extract_single_var_from_expr (tree expr)
1268 {
1269   int i, n;
1270   tree tmp;
1271   enum tree_code code;
1272
1273   if (!expr || is_gimple_min_invariant (expr))
1274     return NULL;
1275
1276   code = TREE_CODE (expr);
1277   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1278     {
1279       n = TREE_OPERAND_LENGTH (expr);
1280       for (i = 0; i < n; i++)
1281         {
1282           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1283
1284           if (tmp)
1285             return tmp;
1286         }
1287     }
1288   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1289 }
1290
1291 /* Finds basic ivs.  */
1292
1293 static bool
1294 find_bivs (struct ivopts_data *data)
1295 {
1296   gphi *phi;
1297   affine_iv iv;
1298   tree step, type, base, stop;
1299   bool found = false;
1300   class loop *loop = data->current_loop;
1301   gphi_iterator psi;
1302
1303   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1304     {
1305       phi = psi.phi ();
1306
1307       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1308         continue;
1309
1310       if (virtual_operand_p (PHI_RESULT (phi)))
1311         continue;
1312
1313       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1314         continue;
1315
1316       if (integer_zerop (iv.step))
1317         continue;
1318
1319       step = iv.step;
1320       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1321       /* Stop expanding iv base at the first ssa var referred by iv step.
1322          Ideally we should stop at any ssa var, because that's expensive
1323          and unusual to happen, we just do it on the first one.
1324
1325          See PR64705 for the rationale.  */
1326       stop = extract_single_var_from_expr (step);
1327       base = expand_simple_operations (base, stop);
1328       if (contains_abnormal_ssa_name_p (base)
1329           || contains_abnormal_ssa_name_p (step))
1330         continue;
1331
1332       type = TREE_TYPE (PHI_RESULT (phi));
1333       base = fold_convert (type, base);
1334       if (step)
1335         {
1336           if (POINTER_TYPE_P (type))
1337             step = convert_to_ptrofftype (step);
1338           else
1339             step = fold_convert (type, step);
1340         }
1341
1342       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1343       found = true;
1344     }
1345
1346   return found;
1347 }
1348
1349 /* Marks basic ivs.  */
1350
1351 static void
1352 mark_bivs (struct ivopts_data *data)
1353 {
1354   gphi *phi;
1355   gimple *def;
1356   tree var;
1357   struct iv *iv, *incr_iv;
1358   class loop *loop = data->current_loop;
1359   basic_block incr_bb;
1360   gphi_iterator psi;
1361
1362   data->bivs_not_used_in_addr = 0;
1363   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1364     {
1365       phi = psi.phi ();
1366
1367       iv = get_iv (data, PHI_RESULT (phi));
1368       if (!iv)
1369         continue;
1370
1371       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1372       def = SSA_NAME_DEF_STMT (var);
1373       /* Don't mark iv peeled from other one as biv.  */
1374       if (def
1375           && gimple_code (def) == GIMPLE_PHI
1376           && gimple_bb (def) == loop->header)
1377         continue;
1378
1379       incr_iv = get_iv (data, var);
1380       if (!incr_iv)
1381         continue;
1382
1383       /* If the increment is in the subloop, ignore it.  */
1384       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1385       if (incr_bb->loop_father != data->current_loop
1386           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1387         continue;
1388
1389       iv->biv_p = true;
1390       incr_iv->biv_p = true;
1391       if (iv->no_overflow)
1392         data->bivs_not_used_in_addr++;
1393       if (incr_iv->no_overflow)
1394         data->bivs_not_used_in_addr++;
1395     }
1396 }
1397
1398 /* Checks whether STMT defines a linear induction variable and stores its
1399    parameters to IV.  */
1400
1401 static bool
1402 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1403 {
1404   tree lhs, stop;
1405   class loop *loop = data->current_loop;
1406
1407   iv->base = NULL_TREE;
1408   iv->step = NULL_TREE;
1409
1410   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1411     return false;
1412
1413   lhs = gimple_assign_lhs (stmt);
1414   if (TREE_CODE (lhs) != SSA_NAME)
1415     return false;
1416
1417   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1418     return false;
1419
1420   /* Stop expanding iv base at the first ssa var referred by iv step.
1421      Ideally we should stop at any ssa var, because that's expensive
1422      and unusual to happen, we just do it on the first one.
1423
1424      See PR64705 for the rationale.  */
1425   stop = extract_single_var_from_expr (iv->step);
1426   iv->base = expand_simple_operations (iv->base, stop);
1427   if (contains_abnormal_ssa_name_p (iv->base)
1428       || contains_abnormal_ssa_name_p (iv->step))
1429     return false;
1430
1431   /* If STMT could throw, then do not consider STMT as defining a GIV.
1432      While this will suppress optimizations, we cannot safely delete this
1433      GIV and associated statements, even if it appears it is not used.  */
1434   if (stmt_could_throw_p (cfun, stmt))
1435     return false;
1436
1437   return true;
1438 }
1439
1440 /* Finds general ivs in statement STMT.  */
1441
1442 static void
1443 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1444 {
1445   affine_iv iv;
1446
1447   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1448     return;
1449
1450   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1451 }
1452
1453 /* Finds general ivs in basic block BB.  */
1454
1455 static void
1456 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1457 {
1458   gimple_stmt_iterator bsi;
1459
1460   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1461     find_givs_in_stmt (data, gsi_stmt (bsi));
1462 }
1463
1464 /* Finds general ivs.  */
1465
1466 static void
1467 find_givs (struct ivopts_data *data, basic_block *body)
1468 {
1469   class loop *loop = data->current_loop;
1470   unsigned i;
1471
1472   for (i = 0; i < loop->num_nodes; i++)
1473     find_givs_in_bb (data, body[i]);
1474 }
1475
1476 /* For each ssa name defined in LOOP determines whether it is an induction
1477    variable and if so, its initial value and step.  */
1478
1479 static bool
1480 find_induction_variables (struct ivopts_data *data, basic_block *body)
1481 {
1482   unsigned i;
1483   bitmap_iterator bi;
1484
1485   if (!find_bivs (data))
1486     return false;
1487
1488   find_givs (data, body);
1489   mark_bivs (data);
1490
1491   if (dump_file && (dump_flags & TDF_DETAILS))
1492     {
1493       class tree_niter_desc *niter = niter_for_single_dom_exit (data);
1494
1495       if (niter)
1496         {
1497           fprintf (dump_file, "  number of iterations ");
1498           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1499           if (!integer_zerop (niter->may_be_zero))
1500             {
1501               fprintf (dump_file, "; zero if ");
1502               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1503             }
1504           fprintf (dump_file, "\n");
1505         };
1506
1507       fprintf (dump_file, "\n<Induction Vars>:\n");
1508       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1509         {
1510           struct version_info *info = ver_info (data, i);
1511           if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1512             dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1513         }
1514     }
1515
1516   return true;
1517 }
1518
1519 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1520    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1521    is the const offset stripped from IV base and MEM_TYPE is the type
1522    of the memory being addressed.  For uses of other types, ADDR_BASE
1523    and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE.  */
1524
1525 static struct iv_use *
1526 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1527             gimple *stmt, enum use_type type, tree mem_type,
1528             tree addr_base, poly_uint64 addr_offset)
1529 {
1530   struct iv_use *use = XCNEW (struct iv_use);
1531
1532   use->id = group->vuses.length ();
1533   use->group_id = group->id;
1534   use->type = type;
1535   use->mem_type = mem_type;
1536   use->iv = iv;
1537   use->stmt = stmt;
1538   use->op_p = use_p;
1539   use->addr_base = addr_base;
1540   use->addr_offset = addr_offset;
1541
1542   group->vuses.safe_push (use);
1543   return use;
1544 }
1545
1546 /* Checks whether OP is a loop-level invariant and if so, records it.
1547    NONLINEAR_USE is true if the invariant is used in a way we do not
1548    handle specially.  */
1549
1550 static void
1551 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1552 {
1553   basic_block bb;
1554   struct version_info *info;
1555
1556   if (TREE_CODE (op) != SSA_NAME
1557       || virtual_operand_p (op))
1558     return;
1559
1560   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1561   if (bb
1562       && flow_bb_inside_loop_p (data->current_loop, bb))
1563     return;
1564
1565   info = name_info (data, op);
1566   info->name = op;
1567   info->has_nonlin_use |= nonlinear_use;
1568   if (!info->inv_id)
1569     info->inv_id = ++data->max_inv_var_id;
1570   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1571 }
1572
1573 /* Record a group of TYPE.  */
1574
1575 static struct iv_group *
1576 record_group (struct ivopts_data *data, enum use_type type)
1577 {
1578   struct iv_group *group = XCNEW (struct iv_group);
1579
1580   group->id = data->vgroups.length ();
1581   group->type = type;
1582   group->related_cands = BITMAP_ALLOC (NULL);
1583   group->vuses.create (1);
1584   group->doloop_p = false;
1585
1586   data->vgroups.safe_push (group);
1587   return group;
1588 }
1589
1590 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1591    New group will be created if there is no existing group for the use.
1592    MEM_TYPE is the type of memory being addressed, or NULL if this
1593    isn't an address reference.  */
1594
1595 static struct iv_use *
1596 record_group_use (struct ivopts_data *data, tree *use_p,
1597                   struct iv *iv, gimple *stmt, enum use_type type,
1598                   tree mem_type)
1599 {
1600   tree addr_base = NULL;
1601   struct iv_group *group = NULL;
1602   poly_uint64 addr_offset = 0;
1603
1604   /* Record non address type use in a new group.  */
1605   if (address_p (type))
1606     {
1607       unsigned int i;
1608
1609       addr_base = strip_offset (iv->base, &addr_offset);
1610       for (i = 0; i < data->vgroups.length (); i++)
1611         {
1612           struct iv_use *use;
1613
1614           group = data->vgroups[i];
1615           use = group->vuses[0];
1616           if (!address_p (use->type))
1617             continue;
1618
1619           /* Check if it has the same stripped base and step.  */
1620           if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1621               && operand_equal_p (iv->step, use->iv->step, 0)
1622               && operand_equal_p (addr_base, use->addr_base, 0))
1623             break;
1624         }
1625       if (i == data->vgroups.length ())
1626         group = NULL;
1627     }
1628
1629   if (!group)
1630     group = record_group (data, type);
1631
1632   return record_use (group, use_p, iv, stmt, type, mem_type,
1633                      addr_base, addr_offset);
1634 }
1635
1636 /* Checks whether the use OP is interesting and if so, records it.  */
1637
1638 static struct iv_use *
1639 find_interesting_uses_op (struct ivopts_data *data, tree op)
1640 {
1641   struct iv *iv;
1642   gimple *stmt;
1643   struct iv_use *use;
1644
1645   if (TREE_CODE (op) != SSA_NAME)
1646     return NULL;
1647
1648   iv = get_iv (data, op);
1649   if (!iv)
1650     return NULL;
1651
1652   if (iv->nonlin_use)
1653     {
1654       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1655       return iv->nonlin_use;
1656     }
1657
1658   if (integer_zerop (iv->step))
1659     {
1660       record_invariant (data, op, true);
1661       return NULL;
1662     }
1663
1664   stmt = SSA_NAME_DEF_STMT (op);
1665   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1666
1667   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1668   iv->nonlin_use = use;
1669   return use;
1670 }
1671
1672 /* Indicate how compare type iv_use can be handled.  */
1673 enum comp_iv_rewrite
1674 {
1675   COMP_IV_NA,
1676   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1677   COMP_IV_EXPR,
1678   /* We may rewrite compare type iv_uses on both sides of comparison by
1679      expressing value of each iv_use.  */
1680   COMP_IV_EXPR_2,
1681   /* We may rewrite compare type iv_use by expressing value of the iv_use
1682      or by eliminating it with other iv_cand.  */
1683   COMP_IV_ELIM
1684 };
1685
1686 /* Given a condition in statement STMT, checks whether it is a compare
1687    of an induction variable and an invariant.  If this is the case,
1688    CONTROL_VAR is set to location of the iv, BOUND to the location of
1689    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1690    induction variable descriptions, and true is returned.  If this is not
1691    the case, CONTROL_VAR and BOUND are set to the arguments of the
1692    condition and false is returned.  */
1693
1694 static enum comp_iv_rewrite
1695 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1696                        tree **control_var, tree **bound,
1697                        struct iv **iv_var, struct iv **iv_bound)
1698 {
1699   /* The objects returned when COND has constant operands.  */
1700   static struct iv const_iv;
1701   static tree zero;
1702   tree *op0 = &zero, *op1 = &zero;
1703   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1704   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1705
1706   if (gimple_code (stmt) == GIMPLE_COND)
1707     {
1708       gcond *cond_stmt = as_a <gcond *> (stmt);
1709       op0 = gimple_cond_lhs_ptr (cond_stmt);
1710       op1 = gimple_cond_rhs_ptr (cond_stmt);
1711     }
1712   else
1713     {
1714       op0 = gimple_assign_rhs1_ptr (stmt);
1715       op1 = gimple_assign_rhs2_ptr (stmt);
1716     }
1717
1718   zero = integer_zero_node;
1719   const_iv.step = integer_zero_node;
1720
1721   if (TREE_CODE (*op0) == SSA_NAME)
1722     iv0 = get_iv (data, *op0);
1723   if (TREE_CODE (*op1) == SSA_NAME)
1724     iv1 = get_iv (data, *op1);
1725
1726   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1727   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1728     {
1729       rewrite_type = COMP_IV_EXPR_2;
1730       goto end;
1731     }
1732
1733   /* If none side of comparison is IV.  */
1734   if ((!iv0 || integer_zerop (iv0->step))
1735       && (!iv1 || integer_zerop (iv1->step)))
1736     goto end;
1737
1738   /* Control variable may be on the other side.  */
1739   if (!iv0 || integer_zerop (iv0->step))
1740     {
1741       std::swap (op0, op1);
1742       std::swap (iv0, iv1);
1743     }
1744   /* If one side is IV and the other side isn't loop invariant.  */
1745   if (!iv1)
1746     rewrite_type = COMP_IV_EXPR;
1747   /* If one side is IV and the other side is loop invariant.  */
1748   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1749     rewrite_type = COMP_IV_ELIM;
1750
1751 end:
1752   if (control_var)
1753     *control_var = op0;
1754   if (iv_var)
1755     *iv_var = iv0;
1756   if (bound)
1757     *bound = op1;
1758   if (iv_bound)
1759     *iv_bound = iv1;
1760
1761   return rewrite_type;
1762 }
1763
1764 /* Checks whether the condition in STMT is interesting and if so,
1765    records it.  */
1766
1767 static void
1768 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1769 {
1770   tree *var_p, *bound_p;
1771   struct iv *var_iv, *bound_iv;
1772   enum comp_iv_rewrite ret;
1773
1774   ret = extract_cond_operands (data, stmt,
1775                                &var_p, &bound_p, &var_iv, &bound_iv);
1776   if (ret == COMP_IV_NA)
1777     {
1778       find_interesting_uses_op (data, *var_p);
1779       find_interesting_uses_op (data, *bound_p);
1780       return;
1781     }
1782
1783   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1784   /* Record compare type iv_use for iv on the other side of comparison.  */
1785   if (ret == COMP_IV_EXPR_2)
1786     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1787 }
1788
1789 /* Returns the outermost loop EXPR is obviously invariant in
1790    relative to the loop LOOP, i.e. if all its operands are defined
1791    outside of the returned loop.  Returns NULL if EXPR is not
1792    even obviously invariant in LOOP.  */
1793
1794 class loop *
1795 outermost_invariant_loop_for_expr (class loop *loop, tree expr)
1796 {
1797   basic_block def_bb;
1798   unsigned i, len;
1799
1800   if (is_gimple_min_invariant (expr))
1801     return current_loops->tree_root;
1802
1803   if (TREE_CODE (expr) == SSA_NAME)
1804     {
1805       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1806       if (def_bb)
1807         {
1808           if (flow_bb_inside_loop_p (loop, def_bb))
1809             return NULL;
1810           return superloop_at_depth (loop,
1811                                      loop_depth (def_bb->loop_father) + 1);
1812         }
1813
1814       return current_loops->tree_root;
1815     }
1816
1817   if (!EXPR_P (expr))
1818     return NULL;
1819
1820   unsigned maxdepth = 0;
1821   len = TREE_OPERAND_LENGTH (expr);
1822   for (i = 0; i < len; i++)
1823     {
1824       class loop *ivloop;
1825       if (!TREE_OPERAND (expr, i))
1826         continue;
1827
1828       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1829       if (!ivloop)
1830         return NULL;
1831       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1832     }
1833
1834   return superloop_at_depth (loop, maxdepth);
1835 }
1836
1837 /* Returns true if expression EXPR is obviously invariant in LOOP,
1838    i.e. if all its operands are defined outside of the LOOP.  LOOP
1839    should not be the function body.  */
1840
1841 bool
1842 expr_invariant_in_loop_p (class loop *loop, tree expr)
1843 {
1844   basic_block def_bb;
1845   unsigned i, len;
1846
1847   gcc_assert (loop_depth (loop) > 0);
1848
1849   if (is_gimple_min_invariant (expr))
1850     return true;
1851
1852   if (TREE_CODE (expr) == SSA_NAME)
1853     {
1854       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1855       if (def_bb
1856           && flow_bb_inside_loop_p (loop, def_bb))
1857         return false;
1858
1859       return true;
1860     }
1861
1862   if (!EXPR_P (expr))
1863     return false;
1864
1865   len = TREE_OPERAND_LENGTH (expr);
1866   for (i = 0; i < len; i++)
1867     if (TREE_OPERAND (expr, i)
1868         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1869       return false;
1870
1871   return true;
1872 }
1873
1874 /* Given expression EXPR which computes inductive values with respect
1875    to loop recorded in DATA, this function returns biv from which EXPR
1876    is derived by tracing definition chains of ssa variables in EXPR.  */
1877
1878 static struct iv*
1879 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1880 {
1881   struct iv *iv;
1882   unsigned i, n;
1883   tree e2, e1;
1884   enum tree_code code;
1885   gimple *stmt;
1886
1887   if (expr == NULL_TREE)
1888     return NULL;
1889
1890   if (is_gimple_min_invariant (expr))
1891     return NULL;
1892
1893   code = TREE_CODE (expr);
1894   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1895     {
1896       n = TREE_OPERAND_LENGTH (expr);
1897       for (i = 0; i < n; i++)
1898         {
1899           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1900           if (iv)
1901             return iv;
1902         }
1903     }
1904
1905   /* Stop if it's not ssa name.  */
1906   if (code != SSA_NAME)
1907     return NULL;
1908
1909   iv = get_iv (data, expr);
1910   if (!iv || integer_zerop (iv->step))
1911     return NULL;
1912   else if (iv->biv_p)
1913     return iv;
1914
1915   stmt = SSA_NAME_DEF_STMT (expr);
1916   if (gphi *phi = dyn_cast <gphi *> (stmt))
1917     {
1918       ssa_op_iter iter;
1919       use_operand_p use_p;
1920       basic_block phi_bb = gimple_bb (phi);
1921
1922       /* Skip loop header PHI that doesn't define biv.  */
1923       if (phi_bb->loop_father == data->current_loop)
1924         return NULL;
1925
1926       if (virtual_operand_p (gimple_phi_result (phi)))
1927         return NULL;
1928
1929       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1930         {
1931           tree use = USE_FROM_PTR (use_p);
1932           iv = find_deriving_biv_for_expr (data, use);
1933           if (iv)
1934             return iv;
1935         }
1936       return NULL;
1937     }
1938   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1939     return NULL;
1940
1941   e1 = gimple_assign_rhs1 (stmt);
1942   code = gimple_assign_rhs_code (stmt);
1943   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1944     return find_deriving_biv_for_expr (data, e1);
1945
1946   switch (code)
1947     {
1948     case MULT_EXPR:
1949     case PLUS_EXPR:
1950     case MINUS_EXPR:
1951     case POINTER_PLUS_EXPR:
1952       /* Increments, decrements and multiplications by a constant
1953          are simple.  */
1954       e2 = gimple_assign_rhs2 (stmt);
1955       iv = find_deriving_biv_for_expr (data, e2);
1956       if (iv)
1957         return iv;
1958       gcc_fallthrough ();
1959
1960     CASE_CONVERT:
1961       /* Casts are simple.  */
1962       return find_deriving_biv_for_expr (data, e1);
1963
1964     default:
1965       break;
1966     }
1967
1968   return NULL;
1969 }
1970
1971 /* Record BIV, its predecessor and successor that they are used in
1972    address type uses.  */
1973
1974 static void
1975 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1976 {
1977   unsigned i;
1978   tree type, base_1, base_2;
1979   bitmap_iterator bi;
1980
1981   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1982       || biv->have_address_use || !biv->no_overflow)
1983     return;
1984
1985   type = TREE_TYPE (biv->base);
1986   if (!INTEGRAL_TYPE_P (type))
1987     return;
1988
1989   biv->have_address_use = true;
1990   data->bivs_not_used_in_addr--;
1991   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1992   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1993     {
1994       struct iv *iv = ver_info (data, i)->iv;
1995
1996       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1997           || iv->have_address_use || !iv->no_overflow)
1998         continue;
1999
2000       if (type != TREE_TYPE (iv->base)
2001           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2002         continue;
2003
2004       if (!operand_equal_p (biv->step, iv->step, 0))
2005         continue;
2006
2007       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2008       if (operand_equal_p (base_1, iv->base, 0)
2009           || operand_equal_p (base_2, biv->base, 0))
2010         {
2011           iv->have_address_use = true;
2012           data->bivs_not_used_in_addr--;
2013         }
2014     }
2015 }
2016
2017 /* Cumulates the steps of indices into DATA and replaces their values with the
2018    initial ones.  Returns false when the value of the index cannot be determined.
2019    Callback for for_each_index.  */
2020
2021 struct ifs_ivopts_data
2022 {
2023   struct ivopts_data *ivopts_data;
2024   gimple *stmt;
2025   tree step;
2026 };
2027
2028 static bool
2029 idx_find_step (tree base, tree *idx, void *data)
2030 {
2031   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2032   struct iv *iv;
2033   bool use_overflow_semantics = false;
2034   tree step, iv_base, iv_step, lbound, off;
2035   class loop *loop = dta->ivopts_data->current_loop;
2036
2037   /* If base is a component ref, require that the offset of the reference
2038      be invariant.  */
2039   if (TREE_CODE (base) == COMPONENT_REF)
2040     {
2041       off = component_ref_field_offset (base);
2042       return expr_invariant_in_loop_p (loop, off);
2043     }
2044
2045   /* If base is array, first check whether we will be able to move the
2046      reference out of the loop (in order to take its address in strength
2047      reduction).  In order for this to work we need both lower bound
2048      and step to be loop invariants.  */
2049   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2050     {
2051       /* Moreover, for a range, the size needs to be invariant as well.  */
2052       if (TREE_CODE (base) == ARRAY_RANGE_REF
2053           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2054         return false;
2055
2056       step = array_ref_element_size (base);
2057       lbound = array_ref_low_bound (base);
2058
2059       if (!expr_invariant_in_loop_p (loop, step)
2060           || !expr_invariant_in_loop_p (loop, lbound))
2061         return false;
2062     }
2063
2064   if (TREE_CODE (*idx) != SSA_NAME)
2065     return true;
2066
2067   iv = get_iv (dta->ivopts_data, *idx);
2068   if (!iv)
2069     return false;
2070
2071   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2072           *&x[0], which is not folded and does not trigger the
2073           ARRAY_REF path below.  */
2074   *idx = iv->base;
2075
2076   if (integer_zerop (iv->step))
2077     return true;
2078
2079   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2080     {
2081       step = array_ref_element_size (base);
2082
2083       /* We only handle addresses whose step is an integer constant.  */
2084       if (TREE_CODE (step) != INTEGER_CST)
2085         return false;
2086     }
2087   else
2088     /* The step for pointer arithmetics already is 1 byte.  */
2089     step = size_one_node;
2090
2091   iv_base = iv->base;
2092   iv_step = iv->step;
2093   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2094     use_overflow_semantics = true;
2095
2096   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2097                             sizetype, &iv_base, &iv_step, dta->stmt,
2098                             use_overflow_semantics))
2099     {
2100       /* The index might wrap.  */
2101       return false;
2102     }
2103
2104   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2105   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2106
2107   if (dta->ivopts_data->bivs_not_used_in_addr)
2108     {
2109       if (!iv->biv_p)
2110         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2111
2112       record_biv_for_address_use (dta->ivopts_data, iv);
2113     }
2114   return true;
2115 }
2116
2117 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2118    object is passed to it in DATA.  */
2119
2120 static bool
2121 idx_record_use (tree base, tree *idx,
2122                 void *vdata)
2123 {
2124   struct ivopts_data *data = (struct ivopts_data *) vdata;
2125   find_interesting_uses_op (data, *idx);
2126   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2127     {
2128       if (TREE_OPERAND (base, 2))
2129         find_interesting_uses_op (data, TREE_OPERAND (base, 2));
2130       if (TREE_OPERAND (base, 3))
2131         find_interesting_uses_op (data, TREE_OPERAND (base, 3));
2132     }
2133   return true;
2134 }
2135
2136 /* If we can prove that TOP = cst * BOT for some constant cst,
2137    store cst to MUL and return true.  Otherwise return false.
2138    The returned value is always sign-extended, regardless of the
2139    signedness of TOP and BOT.  */
2140
2141 static bool
2142 constant_multiple_of (tree top, tree bot, widest_int *mul)
2143 {
2144   tree mby;
2145   enum tree_code code;
2146   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2147   widest_int res, p0, p1;
2148
2149   STRIP_NOPS (top);
2150   STRIP_NOPS (bot);
2151
2152   if (operand_equal_p (top, bot, 0))
2153     {
2154       *mul = 1;
2155       return true;
2156     }
2157
2158   code = TREE_CODE (top);
2159   switch (code)
2160     {
2161     case MULT_EXPR:
2162       mby = TREE_OPERAND (top, 1);
2163       if (TREE_CODE (mby) != INTEGER_CST)
2164         return false;
2165
2166       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2167         return false;
2168
2169       *mul = wi::sext (res * wi::to_widest (mby), precision);
2170       return true;
2171
2172     case PLUS_EXPR:
2173     case MINUS_EXPR:
2174       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2175           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2176         return false;
2177
2178       if (code == MINUS_EXPR)
2179         p1 = -p1;
2180       *mul = wi::sext (p0 + p1, precision);
2181       return true;
2182
2183     case INTEGER_CST:
2184       if (TREE_CODE (bot) != INTEGER_CST)
2185         return false;
2186
2187       p0 = widest_int::from (wi::to_wide (top), SIGNED);
2188       p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2189       if (p1 == 0)
2190         return false;
2191       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2192       return res == 0;
2193
2194     default:
2195       if (POLY_INT_CST_P (top)
2196           && POLY_INT_CST_P (bot)
2197           && constant_multiple_p (wi::to_poly_widest (top),
2198                                   wi::to_poly_widest (bot), mul))
2199         return true;
2200
2201       return false;
2202     }
2203 }
2204
2205 /* Return true if memory reference REF with step STEP may be unaligned.  */
2206
2207 static bool
2208 may_be_unaligned_p (tree ref, tree step)
2209 {
2210   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2211      thus they are not misaligned.  */
2212   if (TREE_CODE (ref) == TARGET_MEM_REF)
2213     return false;
2214
2215   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2216   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2217     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2218
2219   unsigned HOST_WIDE_INT bitpos;
2220   unsigned int ref_align;
2221   get_object_alignment_1 (ref, &ref_align, &bitpos);
2222   if (ref_align < align
2223       || (bitpos % align) != 0
2224       || (bitpos % BITS_PER_UNIT) != 0)
2225     return true;
2226
2227   unsigned int trailing_zeros = tree_ctz (step);
2228   if (trailing_zeros < HOST_BITS_PER_INT
2229       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2230     return true;
2231
2232   return false;
2233 }
2234
2235 /* Return true if EXPR may be non-addressable.   */
2236
2237 bool
2238 may_be_nonaddressable_p (tree expr)
2239 {
2240   switch (TREE_CODE (expr))
2241     {
2242     case VAR_DECL:
2243       /* Check if it's a register variable.  */
2244       return DECL_HARD_REGISTER (expr);
2245
2246     case TARGET_MEM_REF:
2247       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2248          target, thus they are always addressable.  */
2249       return false;
2250
2251     case MEM_REF:
2252       /* Likewise for MEM_REFs, modulo the storage order.  */
2253       return REF_REVERSE_STORAGE_ORDER (expr);
2254
2255     case BIT_FIELD_REF:
2256       if (REF_REVERSE_STORAGE_ORDER (expr))
2257         return true;
2258       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2259
2260     case COMPONENT_REF:
2261       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2262         return true;
2263       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2264              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2265
2266     case ARRAY_REF:
2267     case ARRAY_RANGE_REF:
2268       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2269         return true;
2270       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2271
2272     case VIEW_CONVERT_EXPR:
2273       /* This kind of view-conversions may wrap non-addressable objects
2274          and make them look addressable.  After some processing the
2275          non-addressability may be uncovered again, causing ADDR_EXPRs
2276          of inappropriate objects to be built.  */
2277       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2278           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2279         return true;
2280       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2281
2282     CASE_CONVERT:
2283       return true;
2284
2285     default:
2286       break;
2287     }
2288
2289   return false;
2290 }
2291
2292 /* Finds addresses in *OP_P inside STMT.  */
2293
2294 static void
2295 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2296                                tree *op_p)
2297 {
2298   tree base = *op_p, step = size_zero_node;
2299   struct iv *civ;
2300   struct ifs_ivopts_data ifs_ivopts_data;
2301
2302   /* Do not play with volatile memory references.  A bit too conservative,
2303      perhaps, but safe.  */
2304   if (gimple_has_volatile_ops (stmt))
2305     goto fail;
2306
2307   /* Ignore bitfields for now.  Not really something terribly complicated
2308      to handle.  TODO.  */
2309   if (TREE_CODE (base) == BIT_FIELD_REF)
2310     goto fail;
2311
2312   base = unshare_expr (base);
2313
2314   if (TREE_CODE (base) == TARGET_MEM_REF)
2315     {
2316       tree type = build_pointer_type (TREE_TYPE (base));
2317       tree astep;
2318
2319       if (TMR_BASE (base)
2320           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2321         {
2322           civ = get_iv (data, TMR_BASE (base));
2323           if (!civ)
2324             goto fail;
2325
2326           TMR_BASE (base) = civ->base;
2327           step = civ->step;
2328         }
2329       if (TMR_INDEX2 (base)
2330           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2331         {
2332           civ = get_iv (data, TMR_INDEX2 (base));
2333           if (!civ)
2334             goto fail;
2335
2336           TMR_INDEX2 (base) = civ->base;
2337           step = civ->step;
2338         }
2339       if (TMR_INDEX (base)
2340           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2341         {
2342           civ = get_iv (data, TMR_INDEX (base));
2343           if (!civ)
2344             goto fail;
2345
2346           TMR_INDEX (base) = civ->base;
2347           astep = civ->step;
2348
2349           if (astep)
2350             {
2351               if (TMR_STEP (base))
2352                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2353
2354               step = fold_build2 (PLUS_EXPR, type, step, astep);
2355             }
2356         }
2357
2358       if (integer_zerop (step))
2359         goto fail;
2360       base = tree_mem_ref_addr (type, base);
2361     }
2362   else
2363     {
2364       ifs_ivopts_data.ivopts_data = data;
2365       ifs_ivopts_data.stmt = stmt;
2366       ifs_ivopts_data.step = size_zero_node;
2367       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2368           || integer_zerop (ifs_ivopts_data.step))
2369         goto fail;
2370       step = ifs_ivopts_data.step;
2371
2372       /* Check that the base expression is addressable.  This needs
2373          to be done after substituting bases of IVs into it.  */
2374       if (may_be_nonaddressable_p (base))
2375         goto fail;
2376
2377       /* Moreover, on strict alignment platforms, check that it is
2378          sufficiently aligned.  */
2379       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2380         goto fail;
2381
2382       base = build_fold_addr_expr (base);
2383
2384       /* Substituting bases of IVs into the base expression might
2385          have caused folding opportunities.  */
2386       if (TREE_CODE (base) == ADDR_EXPR)
2387         {
2388           tree *ref = &TREE_OPERAND (base, 0);
2389           while (handled_component_p (*ref))
2390             ref = &TREE_OPERAND (*ref, 0);
2391           if (TREE_CODE (*ref) == MEM_REF)
2392             {
2393               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2394                                       TREE_OPERAND (*ref, 0),
2395                                       TREE_OPERAND (*ref, 1));
2396               if (tem)
2397                 *ref = tem;
2398             }
2399         }
2400     }
2401
2402   civ = alloc_iv (data, base, step);
2403   /* Fail if base object of this memory reference is unknown.  */
2404   if (civ->base_object == NULL_TREE)
2405     goto fail;
2406
2407   record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2408   return;
2409
2410 fail:
2411   for_each_index (op_p, idx_record_use, data);
2412 }
2413
2414 /* Finds and records invariants used in STMT.  */
2415
2416 static void
2417 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2418 {
2419   ssa_op_iter iter;
2420   use_operand_p use_p;
2421   tree op;
2422
2423   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2424     {
2425       op = USE_FROM_PTR (use_p);
2426       record_invariant (data, op, false);
2427     }
2428 }
2429
2430 /* CALL calls an internal function.  If operand *OP_P will become an
2431    address when the call is expanded, return the type of the memory
2432    being addressed, otherwise return null.  */
2433
2434 static tree
2435 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2436 {
2437   switch (gimple_call_internal_fn (call))
2438     {
2439     case IFN_MASK_LOAD:
2440     case IFN_MASK_LOAD_LANES:
2441     case IFN_LEN_LOAD:
2442       if (op_p == gimple_call_arg_ptr (call, 0))
2443         return TREE_TYPE (gimple_call_lhs (call));
2444       return NULL_TREE;
2445
2446     case IFN_MASK_STORE:
2447     case IFN_MASK_STORE_LANES:
2448     case IFN_LEN_STORE:
2449       if (op_p == gimple_call_arg_ptr (call, 0))
2450         return TREE_TYPE (gimple_call_arg (call, 3));
2451       return NULL_TREE;
2452
2453     default:
2454       return NULL_TREE;
2455     }
2456 }
2457
2458 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2459    Return true if the operand will become an address when STMT
2460    is expanded and record the associated address use if so.  */
2461
2462 static bool
2463 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2464                        struct iv *iv)
2465 {
2466   /* Fail if base object of this memory reference is unknown.  */
2467   if (iv->base_object == NULL_TREE)
2468     return false;
2469
2470   tree mem_type = NULL_TREE;
2471   if (gcall *call = dyn_cast <gcall *> (stmt))
2472     if (gimple_call_internal_p (call))
2473       mem_type = get_mem_type_for_internal_fn (call, op_p);
2474   if (mem_type)
2475     {
2476       iv = alloc_iv (data, iv->base, iv->step);
2477       record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2478       return true;
2479     }
2480   return false;
2481 }
2482
2483 /* Finds interesting uses of induction variables in the statement STMT.  */
2484
2485 static void
2486 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2487 {
2488   struct iv *iv;
2489   tree op, *lhs, *rhs;
2490   ssa_op_iter iter;
2491   use_operand_p use_p;
2492   enum tree_code code;
2493
2494   find_invariants_stmt (data, stmt);
2495
2496   if (gimple_code (stmt) == GIMPLE_COND)
2497     {
2498       find_interesting_uses_cond (data, stmt);
2499       return;
2500     }
2501
2502   if (is_gimple_assign (stmt))
2503     {
2504       lhs = gimple_assign_lhs_ptr (stmt);
2505       rhs = gimple_assign_rhs1_ptr (stmt);
2506
2507       if (TREE_CODE (*lhs) == SSA_NAME)
2508         {
2509           /* If the statement defines an induction variable, the uses are not
2510              interesting by themselves.  */
2511
2512           iv = get_iv (data, *lhs);
2513
2514           if (iv && !integer_zerop (iv->step))
2515             return;
2516         }
2517
2518       code = gimple_assign_rhs_code (stmt);
2519       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2520           && (REFERENCE_CLASS_P (*rhs)
2521               || is_gimple_val (*rhs)))
2522         {
2523           if (REFERENCE_CLASS_P (*rhs))
2524             find_interesting_uses_address (data, stmt, rhs);
2525           else
2526             find_interesting_uses_op (data, *rhs);
2527
2528           if (REFERENCE_CLASS_P (*lhs))
2529             find_interesting_uses_address (data, stmt, lhs);
2530           return;
2531         }
2532       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2533         {
2534           find_interesting_uses_cond (data, stmt);
2535           return;
2536         }
2537
2538       /* TODO -- we should also handle address uses of type
2539
2540          memory = call (whatever);
2541
2542          and
2543
2544          call (memory).  */
2545     }
2546
2547   if (gimple_code (stmt) == GIMPLE_PHI
2548       && gimple_bb (stmt) == data->current_loop->header)
2549     {
2550       iv = get_iv (data, PHI_RESULT (stmt));
2551
2552       if (iv && !integer_zerop (iv->step))
2553         return;
2554     }
2555
2556   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2557     {
2558       op = USE_FROM_PTR (use_p);
2559
2560       if (TREE_CODE (op) != SSA_NAME)
2561         continue;
2562
2563       iv = get_iv (data, op);
2564       if (!iv)
2565         continue;
2566
2567       if (!find_address_like_use (data, stmt, use_p->use, iv))
2568         find_interesting_uses_op (data, op);
2569     }
2570 }
2571
2572 /* Finds interesting uses of induction variables outside of loops
2573    on loop exit edge EXIT.  */
2574
2575 static void
2576 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2577 {
2578   gphi *phi;
2579   gphi_iterator psi;
2580   tree def;
2581
2582   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2583     {
2584       phi = psi.phi ();
2585       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2586       if (!virtual_operand_p (def))
2587         find_interesting_uses_op (data, def);
2588     }
2589 }
2590
2591 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2592    mode for memory reference represented by USE.  */
2593
2594 static GTY (()) vec<rtx, va_gc> *addr_list;
2595
2596 static bool
2597 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2598 {
2599   rtx reg, addr;
2600   unsigned list_index;
2601   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2602   machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2603
2604   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2605   if (list_index >= vec_safe_length (addr_list))
2606     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE, true);
2607
2608   addr = (*addr_list)[list_index];
2609   if (!addr)
2610     {
2611       addr_mode = targetm.addr_space.address_mode (as);
2612       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2613       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2614       (*addr_list)[list_index] = addr;
2615     }
2616   else
2617     addr_mode = GET_MODE (addr);
2618
2619   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2620   return (memory_address_addr_space_p (mem_mode, addr, as));
2621 }
2622
2623 /* Comparison function to sort group in ascending order of addr_offset.  */
2624
2625 static int
2626 group_compare_offset (const void *a, const void *b)
2627 {
2628   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2629   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2630
2631   return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2632 }
2633
2634 /* Check if small groups should be split.  Return true if no group
2635    contains more than two uses with distinct addr_offsets.  Return
2636    false otherwise.  We want to split such groups because:
2637
2638      1) Small groups don't have much benefit and may interfer with
2639         general candidate selection.
2640      2) Size for problem with only small groups is usually small and
2641         general algorithm can handle it well.
2642
2643    TODO -- Above claim may not hold when we want to merge memory
2644    accesses with conseuctive addresses.  */
2645
2646 static bool
2647 split_small_address_groups_p (struct ivopts_data *data)
2648 {
2649   unsigned int i, j, distinct = 1;
2650   struct iv_use *pre;
2651   struct iv_group *group;
2652
2653   for (i = 0; i < data->vgroups.length (); i++)
2654     {
2655       group = data->vgroups[i];
2656       if (group->vuses.length () == 1)
2657         continue;
2658
2659       gcc_assert (address_p (group->type));
2660       if (group->vuses.length () == 2)
2661         {
2662           if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2663                                       group->vuses[1]->addr_offset) > 0)
2664             std::swap (group->vuses[0], group->vuses[1]);
2665         }
2666       else
2667         group->vuses.qsort (group_compare_offset);
2668
2669       if (distinct > 2)
2670         continue;
2671
2672       distinct = 1;
2673       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2674         {
2675           if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2676             {
2677               pre = group->vuses[j];
2678               distinct++;
2679             }
2680
2681           if (distinct > 2)
2682             break;
2683         }
2684     }
2685
2686   return (distinct <= 2);
2687 }
2688
2689 /* For each group of address type uses, this function further groups
2690    these uses according to the maximum offset supported by target's
2691    [base + offset] addressing mode.  */
2692
2693 static void
2694 split_address_groups (struct ivopts_data *data)
2695 {
2696   unsigned int i, j;
2697   /* Always split group.  */
2698   bool split_p = split_small_address_groups_p (data);
2699
2700   for (i = 0; i < data->vgroups.length (); i++)
2701     {
2702       struct iv_group *new_group = NULL;
2703       struct iv_group *group = data->vgroups[i];
2704       struct iv_use *use = group->vuses[0];
2705
2706       use->id = 0;
2707       use->group_id = group->id;
2708       if (group->vuses.length () == 1)
2709         continue;
2710
2711       gcc_assert (address_p (use->type));
2712
2713       for (j = 1; j < group->vuses.length ();)
2714         {
2715           struct iv_use *next = group->vuses[j];
2716           poly_int64 offset = next->addr_offset - use->addr_offset;
2717
2718           /* Split group if aksed to, or the offset against the first
2719              use can't fit in offset part of addressing mode.  IV uses
2720              having the same offset are still kept in one group.  */
2721           if (maybe_ne (offset, 0)
2722               && (split_p || !addr_offset_valid_p (use, offset)))
2723             {
2724               if (!new_group)
2725                 new_group = record_group (data, group->type);
2726               group->vuses.ordered_remove (j);
2727               new_group->vuses.safe_push (next);
2728               continue;
2729             }
2730
2731           next->id = j;
2732           next->group_id = group->id;
2733           j++;
2734         }
2735     }
2736 }
2737
2738 /* Finds uses of the induction variables that are interesting.  */
2739
2740 static void
2741 find_interesting_uses (struct ivopts_data *data, basic_block *body)
2742 {
2743   basic_block bb;
2744   gimple_stmt_iterator bsi;
2745   unsigned i;
2746   edge e;
2747
2748   for (i = 0; i < data->current_loop->num_nodes; i++)
2749     {
2750       edge_iterator ei;
2751       bb = body[i];
2752
2753       FOR_EACH_EDGE (e, ei, bb->succs)
2754         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2755             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2756           find_interesting_uses_outside (data, e);
2757
2758       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2759         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2760       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2761         if (!is_gimple_debug (gsi_stmt (bsi)))
2762           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2763     }
2764
2765   split_address_groups (data);
2766
2767   if (dump_file && (dump_flags & TDF_DETAILS))
2768     {
2769       fprintf (dump_file, "\n<IV Groups>:\n");
2770       dump_groups (dump_file, data);
2771       fprintf (dump_file, "\n");
2772     }
2773 }
2774
2775 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2776    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2777    we are at the top-level of the processed address.  */
2778
2779 static tree
2780 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2781                 poly_int64 *offset)
2782 {
2783   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2784   enum tree_code code;
2785   tree type, orig_type = TREE_TYPE (expr);
2786   poly_int64 off0, off1;
2787   HOST_WIDE_INT st;
2788   tree orig_expr = expr;
2789
2790   STRIP_NOPS (expr);
2791
2792   type = TREE_TYPE (expr);
2793   code = TREE_CODE (expr);
2794   *offset = 0;
2795
2796   switch (code)
2797     {
2798     case POINTER_PLUS_EXPR:
2799     case PLUS_EXPR:
2800     case MINUS_EXPR:
2801       op0 = TREE_OPERAND (expr, 0);
2802       op1 = TREE_OPERAND (expr, 1);
2803
2804       op0 = strip_offset_1 (op0, false, false, &off0);
2805       op1 = strip_offset_1 (op1, false, false, &off1);
2806
2807       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2808       if (op0 == TREE_OPERAND (expr, 0)
2809           && op1 == TREE_OPERAND (expr, 1))
2810         return orig_expr;
2811
2812       if (integer_zerop (op1))
2813         expr = op0;
2814       else if (integer_zerop (op0))
2815         {
2816           if (code == MINUS_EXPR)
2817             expr = fold_build1 (NEGATE_EXPR, type, op1);
2818           else
2819             expr = op1;
2820         }
2821       else
2822         expr = fold_build2 (code, type, op0, op1);
2823
2824       return fold_convert (orig_type, expr);
2825
2826     case MULT_EXPR:
2827       op1 = TREE_OPERAND (expr, 1);
2828       if (!cst_and_fits_in_hwi (op1))
2829         return orig_expr;
2830
2831       op0 = TREE_OPERAND (expr, 0);
2832       op0 = strip_offset_1 (op0, false, false, &off0);
2833       if (op0 == TREE_OPERAND (expr, 0))
2834         return orig_expr;
2835
2836       *offset = off0 * int_cst_value (op1);
2837       if (integer_zerop (op0))
2838         expr = op0;
2839       else
2840         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2841
2842       return fold_convert (orig_type, expr);
2843
2844     case ARRAY_REF:
2845     case ARRAY_RANGE_REF:
2846       if (!inside_addr)
2847         return orig_expr;
2848
2849       step = array_ref_element_size (expr);
2850       if (!cst_and_fits_in_hwi (step))
2851         break;
2852
2853       st = int_cst_value (step);
2854       op1 = TREE_OPERAND (expr, 1);
2855       op1 = strip_offset_1 (op1, false, false, &off1);
2856       *offset = off1 * st;
2857
2858       if (top_compref
2859           && integer_zerop (op1))
2860         {
2861           /* Strip the component reference completely.  */
2862           op0 = TREE_OPERAND (expr, 0);
2863           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2864           *offset += off0;
2865           return op0;
2866         }
2867       break;
2868
2869     case COMPONENT_REF:
2870       {
2871         tree field;
2872
2873         if (!inside_addr)
2874           return orig_expr;
2875
2876         tmp = component_ref_field_offset (expr);
2877         field = TREE_OPERAND (expr, 1);
2878         if (top_compref
2879             && cst_and_fits_in_hwi (tmp)
2880             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2881           {
2882             HOST_WIDE_INT boffset, abs_off;
2883
2884             /* Strip the component reference completely.  */
2885             op0 = TREE_OPERAND (expr, 0);
2886             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2887             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2888             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2889             if (boffset < 0)
2890               abs_off = -abs_off;
2891
2892             *offset = off0 + int_cst_value (tmp) + abs_off;
2893             return op0;
2894           }
2895       }
2896       break;
2897
2898     case ADDR_EXPR:
2899       op0 = TREE_OPERAND (expr, 0);
2900       op0 = strip_offset_1 (op0, true, true, &off0);
2901       *offset += off0;
2902
2903       if (op0 == TREE_OPERAND (expr, 0))
2904         return orig_expr;
2905
2906       expr = build_fold_addr_expr (op0);
2907       return fold_convert (orig_type, expr);
2908
2909     case MEM_REF:
2910       /* ???  Offset operand?  */
2911       inside_addr = false;
2912       break;
2913
2914     default:
2915       if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2916         return build_int_cst (orig_type, 0);
2917       return orig_expr;
2918     }
2919
2920   /* Default handling of expressions for that we want to recurse into
2921      the first operand.  */
2922   op0 = TREE_OPERAND (expr, 0);
2923   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2924   *offset += off0;
2925
2926   if (op0 == TREE_OPERAND (expr, 0)
2927       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2928     return orig_expr;
2929
2930   expr = copy_node (expr);
2931   TREE_OPERAND (expr, 0) = op0;
2932   if (op1)
2933     TREE_OPERAND (expr, 1) = op1;
2934
2935   /* Inside address, we might strip the top level component references,
2936      thus changing type of the expression.  Handling of ADDR_EXPR
2937      will fix that.  */
2938   expr = fold_convert (orig_type, expr);
2939
2940   return expr;
2941 }
2942
2943 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2944
2945 tree
2946 strip_offset (tree expr, poly_uint64_pod *offset)
2947 {
2948   poly_int64 off;
2949   tree core = strip_offset_1 (expr, false, false, &off);
2950   *offset = off;
2951   return core;
2952 }
2953
2954 /* Returns variant of TYPE that can be used as base for different uses.
2955    We return unsigned type with the same precision, which avoids problems
2956    with overflows.  */
2957
2958 static tree
2959 generic_type_for (tree type)
2960 {
2961   if (POINTER_TYPE_P (type))
2962     return unsigned_type_for (type);
2963
2964   if (TYPE_UNSIGNED (type))
2965     return type;
2966
2967   return unsigned_type_for (type);
2968 }
2969
2970 /* Private data for walk_tree.  */
2971
2972 struct walk_tree_data
2973 {
2974   bitmap *inv_vars;
2975   struct ivopts_data *idata;
2976 };
2977
2978 /* Callback function for walk_tree, it records invariants and symbol
2979    reference in *EXPR_P.  DATA is the structure storing result info.  */
2980
2981 static tree
2982 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2983 {
2984   tree op = *expr_p;
2985   struct version_info *info;
2986   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2987
2988   if (TREE_CODE (op) != SSA_NAME)
2989     return NULL_TREE;
2990
2991   info = name_info (wdata->idata, op);
2992   /* Because we expand simple operations when finding IVs, loop invariant
2993      variable that isn't referred by the original loop could be used now.
2994      Record such invariant variables here.  */
2995   if (!info->iv)
2996     {
2997       struct ivopts_data *idata = wdata->idata;
2998       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2999
3000       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
3001         {
3002           tree steptype = TREE_TYPE (op);
3003           if (POINTER_TYPE_P (steptype))
3004             steptype = sizetype;
3005           set_iv (idata, op, op, build_int_cst (steptype, 0), true);
3006           record_invariant (idata, op, false);
3007         }
3008     }
3009   if (!info->inv_id || info->has_nonlin_use)
3010     return NULL_TREE;
3011
3012   if (!*wdata->inv_vars)
3013     *wdata->inv_vars = BITMAP_ALLOC (NULL);
3014   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3015
3016   return NULL_TREE;
3017 }
3018
3019 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
3020    store it.  */
3021
3022 static inline void
3023 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3024 {
3025   struct walk_tree_data wdata;
3026
3027   if (!inv_vars)
3028     return;
3029
3030   wdata.idata = data;
3031   wdata.inv_vars = inv_vars;
3032   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3033 }
3034
3035 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
3036    will be recorded if it doesn't exist yet.  Given below two exprs:
3037      inv_expr + cst1, inv_expr + cst2
3038    It's hard to make decision whether constant part should be stripped
3039    or not.  We choose to not strip based on below facts:
3040      1) We need to count ADD cost for constant part if it's stripped,
3041         which isn't always trivial where this functions is called.
3042      2) Stripping constant away may be conflict with following loop
3043         invariant hoisting pass.
3044      3) Not stripping constant away results in more invariant exprs,
3045         which usually leads to decision preferring lower reg pressure.  */
3046
3047 static iv_inv_expr_ent *
3048 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3049 {
3050   STRIP_NOPS (inv_expr);
3051
3052   if (poly_int_tree_p (inv_expr)
3053       || TREE_CODE (inv_expr) == SSA_NAME)
3054     return NULL;
3055
3056   /* Don't strip constant part away as we used to.  */
3057
3058   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
3059   struct iv_inv_expr_ent ent;
3060   ent.expr = inv_expr;
3061   ent.hash = iterative_hash_expr (inv_expr, 0);
3062   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3063
3064   if (!*slot)
3065     {
3066       *slot = XNEW (struct iv_inv_expr_ent);
3067       (*slot)->expr = inv_expr;
3068       (*slot)->hash = ent.hash;
3069       (*slot)->id = ++data->max_inv_expr_id;
3070     }
3071
3072   return *slot;
3073 }
3074
3075
3076 /* Return *TP if it is an SSA_NAME marked with TREE_VISITED, i.e., as
3077    unsuitable as ivopts candidates for potentially involving undefined
3078    behavior.  */
3079
3080 static tree
3081 find_ssa_undef (tree *tp, int *walk_subtrees, void *bb_)
3082 {
3083   basic_block bb = (basic_block) bb_;
3084   if (TREE_CODE (*tp) == SSA_NAME
3085       && ssa_name_maybe_undef_p (*tp)
3086       && !ssa_name_any_use_dominates_bb_p (*tp, bb))
3087     return *tp;
3088   if (!EXPR_P (*tp))
3089     *walk_subtrees = 0;
3090   return NULL;
3091 }
3092
3093 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3094    position to POS.  If USE is not NULL, the candidate is set as related to
3095    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
3096    replacement of the final value of the iv by a direct computation.  */
3097
3098 static struct iv_cand *
3099 add_candidate_1 (struct ivopts_data *data, tree base, tree step, bool important,
3100                  enum iv_position pos, struct iv_use *use,
3101                  gimple *incremented_at, struct iv *orig_iv = NULL,
3102                  bool doloop = false)
3103 {
3104   unsigned i;
3105   struct iv_cand *cand = NULL;
3106   tree type, orig_type;
3107
3108   gcc_assert (base && step);
3109
3110   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3111      live, but the ivopts code may replace a real pointer with one
3112      pointing before or after the memory block that is then adjusted
3113      into the memory block during the loop.  FIXME: It would likely be
3114      better to actually force the pointer live and still use ivopts;
3115      for example, it would be enough to write the pointer into memory
3116      and keep it there until after the loop.  */
3117   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3118     return NULL;
3119
3120   /* If BASE contains undefined SSA names make sure we only record
3121      the original IV.  */
3122   bool involves_undefs = false;
3123   if (walk_tree (&base, find_ssa_undef, data->current_loop->header, NULL))
3124     {
3125       if (pos != IP_ORIGINAL)
3126         return NULL;
3127       important = false;
3128       involves_undefs = true;
3129     }
3130
3131   /* For non-original variables, make sure their values are computed in a type
3132      that does not invoke undefined behavior on overflows (since in general,
3133      we cannot prove that these induction variables are non-wrapping).  */
3134   if (pos != IP_ORIGINAL)
3135     {
3136       orig_type = TREE_TYPE (base);
3137       type = generic_type_for (orig_type);
3138       if (type != orig_type)
3139         {
3140           base = fold_convert (type, base);
3141           step = fold_convert (type, step);
3142         }
3143     }
3144
3145   for (i = 0; i < data->vcands.length (); i++)
3146     {
3147       cand = data->vcands[i];
3148
3149       if (cand->pos != pos)
3150         continue;
3151
3152       if (cand->incremented_at != incremented_at
3153           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3154               && cand->ainc_use != use))
3155         continue;
3156
3157       if (operand_equal_p (base, cand->iv->base, 0)
3158           && operand_equal_p (step, cand->iv->step, 0)
3159           && (TYPE_PRECISION (TREE_TYPE (base))
3160               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3161         break;
3162     }
3163
3164   if (i == data->vcands.length ())
3165     {
3166       cand = XCNEW (struct iv_cand);
3167       cand->id = i;
3168       cand->iv = alloc_iv (data, base, step);
3169       cand->pos = pos;
3170       if (pos != IP_ORIGINAL)
3171         {
3172           if (doloop)
3173             cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop");
3174           else
3175             cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3176           cand->var_after = cand->var_before;
3177         }
3178       cand->important = important;
3179       cand->involves_undefs = involves_undefs;
3180       cand->incremented_at = incremented_at;
3181       cand->doloop_p = doloop;
3182       data->vcands.safe_push (cand);
3183
3184       if (!poly_int_tree_p (step))
3185         {
3186           find_inv_vars (data, &step, &cand->inv_vars);
3187
3188           iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3189           /* Share bitmap between inv_vars and inv_exprs for cand.  */
3190           if (inv_expr != NULL)
3191             {
3192               cand->inv_exprs = cand->inv_vars;
3193               cand->inv_vars = NULL;
3194               if (cand->inv_exprs)
3195                 bitmap_clear (cand->inv_exprs);
3196               else
3197                 cand->inv_exprs = BITMAP_ALLOC (NULL);
3198
3199               bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3200             }
3201         }
3202
3203       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3204         cand->ainc_use = use;
3205       else
3206         cand->ainc_use = NULL;
3207
3208       cand->orig_iv = orig_iv;
3209       if (dump_file && (dump_flags & TDF_DETAILS))
3210         dump_cand (dump_file, cand);
3211     }
3212
3213   cand->important |= important;
3214   cand->doloop_p |= doloop;
3215
3216   /* Relate candidate to the group for which it is added.  */
3217   if (use)
3218     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3219
3220   return cand;
3221 }
3222
3223 /* Returns true if incrementing the induction variable at the end of the LOOP
3224    is allowed.
3225
3226    The purpose is to avoid splitting latch edge with a biv increment, thus
3227    creating a jump, possibly confusing other optimization passes and leaving
3228    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3229    available (so we do not have a better alternative), or if the latch edge
3230    is already nonempty.  */
3231
3232 static bool
3233 allow_ip_end_pos_p (class loop *loop)
3234 {
3235   if (!ip_normal_pos (loop))
3236     return true;
3237
3238   if (!empty_block_p (ip_end_pos (loop)))
3239     return true;
3240
3241   return false;
3242 }
3243
3244 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3245    Important field is set to IMPORTANT.  */
3246
3247 static void
3248 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3249                         bool important, struct iv_use *use)
3250 {
3251   basic_block use_bb = gimple_bb (use->stmt);
3252   machine_mode mem_mode;
3253   unsigned HOST_WIDE_INT cstepi;
3254
3255   /* If we insert the increment in any position other than the standard
3256      ones, we must ensure that it is incremented once per iteration.
3257      It must not be in an inner nested loop, or one side of an if
3258      statement.  */
3259   if (use_bb->loop_father != data->current_loop
3260       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3261       || stmt_can_throw_internal (cfun, use->stmt)
3262       || !cst_and_fits_in_hwi (step))
3263     return;
3264
3265   cstepi = int_cst_value (step);
3266
3267   mem_mode = TYPE_MODE (use->mem_type);
3268   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3269         || USE_STORE_PRE_INCREMENT (mem_mode))
3270        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3271       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3272            || USE_STORE_PRE_DECREMENT (mem_mode))
3273           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3274     {
3275       enum tree_code code = MINUS_EXPR;
3276       tree new_base;
3277       tree new_step = step;
3278
3279       if (POINTER_TYPE_P (TREE_TYPE (base)))
3280         {
3281           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3282           code = POINTER_PLUS_EXPR;
3283         }
3284       else
3285         new_step = fold_convert (TREE_TYPE (base), new_step);
3286       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3287       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3288                        use->stmt);
3289     }
3290   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3291         || USE_STORE_POST_INCREMENT (mem_mode))
3292        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3293       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3294            || USE_STORE_POST_DECREMENT (mem_mode))
3295           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3296     {
3297       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3298                        use->stmt);
3299     }
3300 }
3301
3302 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3303    position to POS.  If USE is not NULL, the candidate is set as related to
3304    it.  The candidate computation is scheduled before exit condition and at
3305    the end of loop.  */
3306
3307 static void
3308 add_candidate (struct ivopts_data *data, tree base, tree step, bool important,
3309                struct iv_use *use, struct iv *orig_iv = NULL,
3310                bool doloop = false)
3311 {
3312   if (ip_normal_pos (data->current_loop))
3313     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL, orig_iv,
3314                      doloop);
3315   /* Exclude doloop candidate here since it requires decrement then comparison
3316      and jump, the IP_END position doesn't match.  */
3317   if (!doloop && ip_end_pos (data->current_loop)
3318       && allow_ip_end_pos_p (data->current_loop))
3319     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3320 }
3321
3322 /* Adds standard iv candidates.  */
3323
3324 static void
3325 add_standard_iv_candidates (struct ivopts_data *data)
3326 {
3327   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3328
3329   /* The same for a double-integer type if it is still fast enough.  */
3330   if (TYPE_PRECISION
3331         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3332       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3333     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3334                    build_int_cst (long_integer_type_node, 1), true, NULL);
3335
3336   /* The same for a double-integer type if it is still fast enough.  */
3337   if (TYPE_PRECISION
3338         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3339       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3340     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3341                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3342 }
3343
3344
3345 /* Adds candidates bases on the old induction variable IV.  */
3346
3347 static void
3348 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3349 {
3350   gimple *phi;
3351   tree def;
3352   struct iv_cand *cand;
3353
3354   /* Check if this biv is used in address type use.  */
3355   if (iv->no_overflow  && iv->have_address_use
3356       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3357       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3358     {
3359       tree base = fold_convert (sizetype, iv->base);
3360       tree step = fold_convert (sizetype, iv->step);
3361
3362       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3363       add_candidate (data, base, step, true, NULL, iv);
3364       /* Add iv cand of the original type only if it has nonlinear use.  */
3365       if (iv->nonlin_use)
3366         add_candidate (data, iv->base, iv->step, true, NULL);
3367     }
3368   else
3369     add_candidate (data, iv->base, iv->step, true, NULL);
3370
3371   /* The same, but with initial value zero.  */
3372   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3373     add_candidate (data, size_int (0), iv->step, true, NULL);
3374   else
3375     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3376                    iv->step, true, NULL);
3377
3378   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3379   if (gimple_code (phi) == GIMPLE_PHI)
3380     {
3381       /* Additionally record the possibility of leaving the original iv
3382          untouched.  */
3383       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3384       /* Don't add candidate if it's from another PHI node because
3385          it's an affine iv appearing in the form of PEELED_CHREC.  */
3386       phi = SSA_NAME_DEF_STMT (def);
3387       if (gimple_code (phi) != GIMPLE_PHI)
3388         {
3389           cand = add_candidate_1 (data,
3390                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3391                                   SSA_NAME_DEF_STMT (def));
3392           if (cand)
3393             {
3394               cand->var_before = iv->ssa_name;
3395               cand->var_after = def;
3396             }
3397         }
3398       else
3399         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3400     }
3401 }
3402
3403 /* Adds candidates based on the old induction variables.  */
3404
3405 static void
3406 add_iv_candidate_for_bivs (struct ivopts_data *data)
3407 {
3408   unsigned i;
3409   struct iv *iv;
3410   bitmap_iterator bi;
3411
3412   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3413     {
3414       iv = ver_info (data, i)->iv;
3415       if (iv && iv->biv_p && !integer_zerop (iv->step))
3416         add_iv_candidate_for_biv (data, iv);
3417     }
3418 }
3419
3420 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3421
3422 static void
3423 record_common_cand (struct ivopts_data *data, tree base,
3424                     tree step, struct iv_use *use)
3425 {
3426   class iv_common_cand ent;
3427   class iv_common_cand **slot;
3428
3429   ent.base = base;
3430   ent.step = step;
3431   ent.hash = iterative_hash_expr (base, 0);
3432   ent.hash = iterative_hash_expr (step, ent.hash);
3433
3434   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3435   if (*slot == NULL)
3436     {
3437       *slot = new iv_common_cand ();
3438       (*slot)->base = base;
3439       (*slot)->step = step;
3440       (*slot)->uses.create (8);
3441       (*slot)->hash = ent.hash;
3442       data->iv_common_cands.safe_push ((*slot));
3443     }
3444
3445   gcc_assert (use != NULL);
3446   (*slot)->uses.safe_push (use);
3447   return;
3448 }
3449
3450 /* Comparison function used to sort common candidates.  */
3451
3452 static int
3453 common_cand_cmp (const void *p1, const void *p2)
3454 {
3455   unsigned n1, n2;
3456   const class iv_common_cand *const *const ccand1
3457     = (const class iv_common_cand *const *)p1;
3458   const class iv_common_cand *const *const ccand2
3459     = (const class iv_common_cand *const *)p2;
3460
3461   n1 = (*ccand1)->uses.length ();
3462   n2 = (*ccand2)->uses.length ();
3463   return n2 - n1;
3464 }
3465
3466 /* Adds IV candidates based on common candidated recorded.  */
3467
3468 static void
3469 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3470 {
3471   unsigned i, j;
3472   struct iv_cand *cand_1, *cand_2;
3473
3474   data->iv_common_cands.qsort (common_cand_cmp);
3475   for (i = 0; i < data->iv_common_cands.length (); i++)
3476     {
3477       class iv_common_cand *ptr = data->iv_common_cands[i];
3478
3479       /* Only add IV candidate if it's derived from multiple uses.  */
3480       if (ptr->uses.length () <= 1)
3481         break;
3482
3483       cand_1 = NULL;
3484       cand_2 = NULL;
3485       if (ip_normal_pos (data->current_loop))
3486         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3487                                   false, IP_NORMAL, NULL, NULL);
3488
3489       if (ip_end_pos (data->current_loop)
3490           && allow_ip_end_pos_p (data->current_loop))
3491         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3492                                   false, IP_END, NULL, NULL);
3493
3494       /* Bind deriving uses and the new candidates.  */
3495       for (j = 0; j < ptr->uses.length (); j++)
3496         {
3497           struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3498           if (cand_1)
3499             bitmap_set_bit (group->related_cands, cand_1->id);
3500           if (cand_2)
3501             bitmap_set_bit (group->related_cands, cand_2->id);
3502         }
3503     }
3504
3505   /* Release data since it is useless from this point.  */
3506   data->iv_common_cand_tab->empty ();
3507   data->iv_common_cands.truncate (0);
3508 }
3509
3510 /* Adds candidates based on the value of USE's iv.  */
3511
3512 static void
3513 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3514 {
3515   poly_uint64 offset;
3516   tree base;
3517   struct iv *iv = use->iv;
3518   tree basetype = TREE_TYPE (iv->base);
3519
3520   /* Don't add candidate for iv_use with non integer, pointer or non-mode
3521      precision types, instead, add candidate for the corresponding scev in
3522      unsigned type with the same precision.  See PR93674 for more info.  */
3523   if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype))
3524       || !type_has_mode_precision_p (basetype))
3525     {
3526       basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype),
3527                                                  TYPE_UNSIGNED (basetype));
3528       add_candidate (data, fold_convert (basetype, iv->base),
3529                      fold_convert (basetype, iv->step), false, NULL);
3530       return;
3531     }
3532
3533   add_candidate (data, iv->base, iv->step, false, use);
3534
3535   /* Record common candidate for use in case it can be shared by others.  */
3536   record_common_cand (data, iv->base, iv->step, use);
3537
3538   /* Record common candidate with initial value zero.  */
3539   basetype = TREE_TYPE (iv->base);
3540   if (POINTER_TYPE_P (basetype))
3541     basetype = sizetype;
3542   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3543
3544   /* Compare the cost of an address with an unscaled index with the cost of
3545     an address with a scaled index and add candidate if useful.  */
3546   poly_int64 step;
3547   if (use != NULL
3548       && poly_int_tree_p (iv->step, &step)
3549       && address_p (use->type))
3550     {
3551       poly_int64 new_step;
3552       unsigned int fact = preferred_mem_scale_factor
3553         (use->iv->base,
3554          TYPE_MODE (use->mem_type),
3555          optimize_loop_for_speed_p (data->current_loop));
3556
3557       if (fact != 1
3558           && multiple_p (step, fact, &new_step))
3559         add_candidate (data, size_int (0),
3560                        wide_int_to_tree (sizetype, new_step),
3561                        true, NULL);
3562     }
3563
3564   /* Record common candidate with constant offset stripped in base.
3565      Like the use itself, we also add candidate directly for it.  */
3566   base = strip_offset (iv->base, &offset);
3567   if (maybe_ne (offset, 0U) || base != iv->base)
3568     {
3569       record_common_cand (data, base, iv->step, use);
3570       add_candidate (data, base, iv->step, false, use);
3571     }
3572
3573   /* Record common candidate with base_object removed in base.  */
3574   base = iv->base;
3575   STRIP_NOPS (base);
3576   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3577     {
3578       tree step = iv->step;
3579
3580       STRIP_NOPS (step);
3581       base = TREE_OPERAND (base, 1);
3582       step = fold_convert (sizetype, step);
3583       record_common_cand (data, base, step, use);
3584       /* Also record common candidate with offset stripped.  */
3585       base = strip_offset (base, &offset);
3586       if (maybe_ne (offset, 0U))
3587         record_common_cand (data, base, step, use);
3588     }
3589
3590   /* At last, add auto-incremental candidates.  Make such variables
3591      important since other iv uses with same base object may be based
3592      on it.  */
3593   if (use != NULL && address_p (use->type))
3594     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3595 }
3596
3597 /* Adds candidates based on the uses.  */
3598
3599 static void
3600 add_iv_candidate_for_groups (struct ivopts_data *data)
3601 {
3602   unsigned i;
3603
3604   /* Only add candidate for the first use in group.  */
3605   for (i = 0; i < data->vgroups.length (); i++)
3606     {
3607       struct iv_group *group = data->vgroups[i];
3608
3609       gcc_assert (group->vuses[0] != NULL);
3610       add_iv_candidate_for_use (data, group->vuses[0]);
3611     }
3612   add_iv_candidate_derived_from_uses (data);
3613 }
3614
3615 /* Record important candidates and add them to related_cands bitmaps.  */
3616
3617 static void
3618 record_important_candidates (struct ivopts_data *data)
3619 {
3620   unsigned i;
3621   struct iv_group *group;
3622
3623   for (i = 0; i < data->vcands.length (); i++)
3624     {
3625       struct iv_cand *cand = data->vcands[i];
3626
3627       if (cand->important)
3628         bitmap_set_bit (data->important_candidates, i);
3629     }
3630
3631   data->consider_all_candidates = (data->vcands.length ()
3632                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3633
3634   /* Add important candidates to groups' related_cands bitmaps.  */
3635   for (i = 0; i < data->vgroups.length (); i++)
3636     {
3637       group = data->vgroups[i];
3638       bitmap_ior_into (group->related_cands, data->important_candidates);
3639     }
3640 }
3641
3642 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3643    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3644    we allocate a simple list to every use.  */
3645
3646 static void
3647 alloc_use_cost_map (struct ivopts_data *data)
3648 {
3649   unsigned i, size, s;
3650
3651   for (i = 0; i < data->vgroups.length (); i++)
3652     {
3653       struct iv_group *group = data->vgroups[i];
3654
3655       if (data->consider_all_candidates)
3656         size = data->vcands.length ();
3657       else
3658         {
3659           s = bitmap_count_bits (group->related_cands);
3660
3661           /* Round up to the power of two, so that moduling by it is fast.  */
3662           size = s ? (1 << ceil_log2 (s)) : 1;
3663         }
3664
3665       group->n_map_members = size;
3666       group->cost_map = XCNEWVEC (class cost_pair, size);
3667     }
3668 }
3669
3670 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3671    on invariants INV_VARS and that the value used in expressing it is
3672    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3673
3674 static void
3675 set_group_iv_cost (struct ivopts_data *data,
3676                    struct iv_group *group, struct iv_cand *cand,
3677                    comp_cost cost, bitmap inv_vars, tree value,
3678                    enum tree_code comp, bitmap inv_exprs)
3679 {
3680   unsigned i, s;
3681
3682   if (cost.infinite_cost_p ())
3683     {
3684       BITMAP_FREE (inv_vars);
3685       BITMAP_FREE (inv_exprs);
3686       return;
3687     }
3688
3689   if (data->consider_all_candidates)
3690     {
3691       group->cost_map[cand->id].cand = cand;
3692       group->cost_map[cand->id].cost = cost;
3693       group->cost_map[cand->id].inv_vars = inv_vars;
3694       group->cost_map[cand->id].inv_exprs = inv_exprs;
3695       group->cost_map[cand->id].value = value;
3696       group->cost_map[cand->id].comp = comp;
3697       return;
3698     }
3699
3700   /* n_map_members is a power of two, so this computes modulo.  */
3701   s = cand->id & (group->n_map_members - 1);
3702   for (i = s; i < group->n_map_members; i++)
3703     if (!group->cost_map[i].cand)
3704       goto found;
3705   for (i = 0; i < s; i++)
3706     if (!group->cost_map[i].cand)
3707       goto found;
3708
3709   gcc_unreachable ();
3710
3711 found:
3712   group->cost_map[i].cand = cand;
3713   group->cost_map[i].cost = cost;
3714   group->cost_map[i].inv_vars = inv_vars;
3715   group->cost_map[i].inv_exprs = inv_exprs;
3716   group->cost_map[i].value = value;
3717   group->cost_map[i].comp = comp;
3718 }
3719
3720 /* Gets cost of (GROUP, CAND) pair.  */
3721
3722 static class cost_pair *
3723 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3724                    struct iv_cand *cand)
3725 {
3726   unsigned i, s;
3727   class cost_pair *ret;
3728
3729   if (!cand)
3730     return NULL;
3731
3732   if (data->consider_all_candidates)
3733     {
3734       ret = group->cost_map + cand->id;
3735       if (!ret->cand)
3736         return NULL;
3737
3738       return ret;
3739     }
3740
3741   /* n_map_members is a power of two, so this computes modulo.  */
3742   s = cand->id & (group->n_map_members - 1);
3743   for (i = s; i < group->n_map_members; i++)
3744     if (group->cost_map[i].cand == cand)
3745       return group->cost_map + i;
3746     else if (group->cost_map[i].cand == NULL)
3747       return NULL;
3748   for (i = 0; i < s; i++)
3749     if (group->cost_map[i].cand == cand)
3750       return group->cost_map + i;
3751     else if (group->cost_map[i].cand == NULL)
3752       return NULL;
3753
3754   return NULL;
3755 }
3756
3757 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3758 static rtx
3759 produce_memory_decl_rtl (tree obj, int *regno)
3760 {
3761   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3762   machine_mode address_mode = targetm.addr_space.address_mode (as);
3763   rtx x;
3764
3765   gcc_assert (obj);
3766   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3767     {
3768       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3769       x = gen_rtx_SYMBOL_REF (address_mode, name);
3770       SET_SYMBOL_REF_DECL (x, obj);
3771       x = gen_rtx_MEM (DECL_MODE (obj), x);
3772       set_mem_addr_space (x, as);
3773       targetm.encode_section_info (obj, x, true);
3774     }
3775   else
3776     {
3777       x = gen_raw_REG (address_mode, (*regno)++);
3778       x = gen_rtx_MEM (DECL_MODE (obj), x);
3779       set_mem_addr_space (x, as);
3780     }
3781
3782   return x;
3783 }
3784
3785 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3786    walk_tree.  DATA contains the actual fake register number.  */
3787
3788 static tree
3789 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3790 {
3791   tree obj = NULL_TREE;
3792   rtx x = NULL_RTX;
3793   int *regno = (int *) data;
3794
3795   switch (TREE_CODE (*expr_p))
3796     {
3797     case ADDR_EXPR:
3798       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3799            handled_component_p (*expr_p);
3800            expr_p = &TREE_OPERAND (*expr_p, 0))
3801         continue;
3802       obj = *expr_p;
3803       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3804         x = produce_memory_decl_rtl (obj, regno);
3805       break;
3806
3807     case SSA_NAME:
3808       *ws = 0;
3809       obj = SSA_NAME_VAR (*expr_p);
3810       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3811       if (!obj)
3812         return NULL_TREE;
3813       if (!DECL_RTL_SET_P (obj))
3814         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3815       break;
3816
3817     case VAR_DECL:
3818     case PARM_DECL:
3819     case RESULT_DECL:
3820       *ws = 0;
3821       obj = *expr_p;
3822
3823       if (DECL_RTL_SET_P (obj))
3824         break;
3825
3826       if (DECL_MODE (obj) == BLKmode)
3827         x = produce_memory_decl_rtl (obj, regno);
3828       else
3829         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3830
3831       break;
3832
3833     default:
3834       break;
3835     }
3836
3837   if (x)
3838     {
3839       decl_rtl_to_reset.safe_push (obj);
3840       SET_DECL_RTL (obj, x);
3841     }
3842
3843   return NULL_TREE;
3844 }
3845
3846 /* Predict whether the given loop will be transformed in the RTL
3847    doloop_optimize pass.  Attempt to duplicate some doloop_optimize checks.
3848    This is only for target independent checks, see targetm.predict_doloop_p
3849    for the target dependent ones.
3850
3851    Note that according to some initial investigation, some checks like costly
3852    niter check and invalid stmt scanning don't have much gains among general
3853    cases, so keep this as simple as possible first.
3854
3855    Some RTL specific checks seems unable to be checked in gimple, if any new
3856    checks or easy checks _are_ missing here, please add them.  */
3857
3858 static bool
3859 generic_predict_doloop_p (struct ivopts_data *data)
3860 {
3861   class loop *loop = data->current_loop;
3862
3863   /* Call target hook for target dependent checks.  */
3864   if (!targetm.predict_doloop_p (loop))
3865     {
3866       if (dump_file && (dump_flags & TDF_DETAILS))
3867         fprintf (dump_file, "Predict doloop failure due to"
3868                             " target specific checks.\n");
3869       return false;
3870     }
3871
3872   /* Similar to doloop_optimize, check iteration description to know it's
3873      suitable or not.  Keep it as simple as possible, feel free to extend it
3874      if you find any multiple exits cases matter.  */
3875   edge exit = single_dom_exit (loop);
3876   class tree_niter_desc *niter_desc;
3877   if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3878     {
3879       if (dump_file && (dump_flags & TDF_DETAILS))
3880         fprintf (dump_file, "Predict doloop failure due to"
3881                             " unexpected niters.\n");
3882       return false;
3883     }
3884
3885   /* Similar to doloop_optimize, check whether iteration count too small
3886      and not profitable.  */
3887   HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3888   if (est_niter == -1)
3889     est_niter = get_likely_max_loop_iterations_int (loop);
3890   if (est_niter >= 0 && est_niter < 3)
3891     {
3892       if (dump_file && (dump_flags & TDF_DETAILS))
3893         fprintf (dump_file,
3894                  "Predict doloop failure due to"
3895                  " too few iterations (%u).\n",
3896                  (unsigned int) est_niter);
3897       return false;
3898     }
3899
3900   return true;
3901 }
3902
3903 /* Determines cost of the computation of EXPR.  */
3904
3905 static unsigned
3906 computation_cost (tree expr, bool speed)
3907 {
3908   rtx_insn *seq;
3909   rtx rslt;
3910   tree type = TREE_TYPE (expr);
3911   unsigned cost;
3912   /* Avoid using hard regs in ways which may be unsupported.  */
3913   int regno = LAST_VIRTUAL_REGISTER + 1;
3914   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3915   enum node_frequency real_frequency = node->frequency;
3916
3917   node->frequency = NODE_FREQUENCY_NORMAL;
3918   crtl->maybe_hot_insn_p = speed;
3919   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3920   start_sequence ();
3921   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3922   seq = get_insns ();
3923   end_sequence ();
3924   default_rtl_profile ();
3925   node->frequency = real_frequency;
3926
3927   cost = seq_cost (seq, speed);
3928   if (MEM_P (rslt))
3929     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3930                           TYPE_ADDR_SPACE (type), speed);
3931   else if (!REG_P (rslt))
3932     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3933
3934   return cost;
3935 }
3936
3937 /* Returns variable containing the value of candidate CAND at statement AT.  */
3938
3939 static tree
3940 var_at_stmt (class loop *loop, struct iv_cand *cand, gimple *stmt)
3941 {
3942   if (stmt_after_increment (loop, cand, stmt))
3943     return cand->var_after;
3944   else
3945     return cand->var_before;
3946 }
3947
3948 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3949    same precision that is at least as wide as the precision of TYPE, stores
3950    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3951    type of A and B.  */
3952
3953 static tree
3954 determine_common_wider_type (tree *a, tree *b)
3955 {
3956   tree wider_type = NULL;
3957   tree suba, subb;
3958   tree atype = TREE_TYPE (*a);
3959
3960   if (CONVERT_EXPR_P (*a))
3961     {
3962       suba = TREE_OPERAND (*a, 0);
3963       wider_type = TREE_TYPE (suba);
3964       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3965         return atype;
3966     }
3967   else
3968     return atype;
3969
3970   if (CONVERT_EXPR_P (*b))
3971     {
3972       subb = TREE_OPERAND (*b, 0);
3973       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3974         return atype;
3975     }
3976   else
3977     return atype;
3978
3979   *a = suba;
3980   *b = subb;
3981   return wider_type;
3982 }
3983
3984 /* Determines the expression by that USE is expressed from induction variable
3985    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3986    decomposed form.  The invariant part is stored in AFF_INV; while variant
3987    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3988    non-null.  Returns false if USE cannot be expressed using CAND.  */
3989
3990 static bool
3991 get_computation_aff_1 (class loop *loop, gimple *at, struct iv_use *use,
3992                        struct iv_cand *cand, class aff_tree *aff_inv,
3993                        class aff_tree *aff_var, widest_int *prat = NULL)
3994 {
3995   tree ubase = use->iv->base, ustep = use->iv->step;
3996   tree cbase = cand->iv->base, cstep = cand->iv->step;
3997   tree common_type, uutype, var, cstep_common;
3998   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3999   aff_tree aff_cbase;
4000   widest_int rat;
4001
4002   /* We must have a precision to express the values of use.  */
4003   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4004     return false;
4005
4006   var = var_at_stmt (loop, cand, at);
4007   uutype = unsigned_type_for (utype);
4008
4009   /* If the conversion is not noop, perform it.  */
4010   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4011     {
4012       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
4013           && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
4014         {
4015           tree inner_base, inner_step, inner_type;
4016           inner_base = TREE_OPERAND (cbase, 0);
4017           if (CONVERT_EXPR_P (cstep))
4018             inner_step = TREE_OPERAND (cstep, 0);
4019           else
4020             inner_step = cstep;
4021
4022           inner_type = TREE_TYPE (inner_base);
4023           /* If candidate is added from a biv whose type is smaller than
4024              ctype, we know both candidate and the biv won't overflow.
4025              In this case, it's safe to skip the convertion in candidate.
4026              As an example, (unsigned short)((unsigned long)A) equals to
4027              (unsigned short)A, if A has a type no larger than short.  */
4028           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
4029             {
4030               cbase = inner_base;
4031               cstep = inner_step;
4032             }
4033         }
4034       cbase = fold_convert (uutype, cbase);
4035       cstep = fold_convert (uutype, cstep);
4036       var = fold_convert (uutype, var);
4037     }
4038
4039   /* Ratio is 1 when computing the value of biv cand by itself.
4040      We can't rely on constant_multiple_of in this case because the
4041      use is created after the original biv is selected.  The call
4042      could fail because of inconsistent fold behavior.  See PR68021
4043      for more information.  */
4044   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4045     {
4046       gcc_assert (is_gimple_assign (use->stmt));
4047       gcc_assert (use->iv->ssa_name == cand->var_after);
4048       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
4049       rat = 1;
4050     }
4051   else if (!constant_multiple_of (ustep, cstep, &rat))
4052     return false;
4053
4054   if (prat)
4055     *prat = rat;
4056
4057   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
4058      type, we achieve better folding by computing their difference in this
4059      wider type, and cast the result to UUTYPE.  We do not need to worry about
4060      overflows, as all the arithmetics will in the end be performed in UUTYPE
4061      anyway.  */
4062   common_type = determine_common_wider_type (&ubase, &cbase);
4063
4064   /* use = ubase - ratio * cbase + ratio * var.  */
4065   tree_to_aff_combination (ubase, common_type, aff_inv);
4066   tree_to_aff_combination (cbase, common_type, &aff_cbase);
4067   tree_to_aff_combination (var, uutype, aff_var);
4068
4069   /* We need to shift the value if we are after the increment.  */
4070   if (stmt_after_increment (loop, cand, at))
4071     {
4072       aff_tree cstep_aff;
4073
4074       if (common_type != uutype)
4075         cstep_common = fold_convert (common_type, cstep);
4076       else
4077         cstep_common = cstep;
4078
4079       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
4080       aff_combination_add (&aff_cbase, &cstep_aff);
4081     }
4082
4083   aff_combination_scale (&aff_cbase, -rat);
4084   aff_combination_add (aff_inv, &aff_cbase);
4085   if (common_type != uutype)
4086     aff_combination_convert (aff_inv, uutype);
4087
4088   aff_combination_scale (aff_var, rat);
4089   return true;
4090 }
4091
4092 /* Determines the expression by that USE is expressed from induction variable
4093    CAND at statement AT in LOOP.  The expression is stored in a decomposed
4094    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
4095
4096 static bool
4097 get_computation_aff (class loop *loop, gimple *at, struct iv_use *use,
4098                      struct iv_cand *cand, class aff_tree *aff)
4099 {
4100   aff_tree aff_var;
4101
4102   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
4103     return false;
4104
4105   aff_combination_add (aff, &aff_var);
4106   return true;
4107 }
4108
4109 /* Return the type of USE.  */
4110
4111 static tree
4112 get_use_type (struct iv_use *use)
4113 {
4114   tree base_type = TREE_TYPE (use->iv->base);
4115   tree type;
4116
4117   if (use->type == USE_REF_ADDRESS)
4118     {
4119       /* The base_type may be a void pointer.  Create a pointer type based on
4120          the mem_ref instead.  */
4121       type = build_pointer_type (TREE_TYPE (*use->op_p));
4122       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4123                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4124     }
4125   else
4126     type = base_type;
4127
4128   return type;
4129 }
4130
4131 /* Determines the expression by that USE is expressed from induction variable
4132    CAND at statement AT in LOOP.  The computation is unshared.  */
4133
4134 static tree
4135 get_computation_at (class loop *loop, gimple *at,
4136                     struct iv_use *use, struct iv_cand *cand)
4137 {
4138   aff_tree aff;
4139   tree type = get_use_type (use);
4140
4141   if (!get_computation_aff (loop, at, use, cand, &aff))
4142     return NULL_TREE;
4143   unshare_aff_combination (&aff);
4144   return fold_convert (type, aff_combination_to_tree (&aff));
4145 }
4146
4147 /* Like get_computation_at, but try harder, even if the computation
4148    is more expensive.  Intended for debug stmts.  */
4149
4150 static tree
4151 get_debug_computation_at (class loop *loop, gimple *at,
4152                           struct iv_use *use, struct iv_cand *cand)
4153 {
4154   if (tree ret = get_computation_at (loop, at, use, cand))
4155     return ret;
4156
4157   tree ubase = use->iv->base, ustep = use->iv->step;
4158   tree cbase = cand->iv->base, cstep = cand->iv->step;
4159   tree var;
4160   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4161   widest_int rat;
4162
4163   /* We must have a precision to express the values of use.  */
4164   if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype))
4165     return NULL_TREE;
4166
4167   /* Try to handle the case that get_computation_at doesn't,
4168      try to express
4169      use = ubase + (var - cbase) / ratio.  */
4170   if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep),
4171                              &rat))
4172     return NULL_TREE;
4173
4174   bool neg_p = false;
4175   if (wi::neg_p (rat))
4176     {
4177       if (TYPE_UNSIGNED (ctype))
4178         return NULL_TREE;
4179       neg_p = true;
4180       rat = wi::neg (rat);
4181     }
4182
4183   /* If both IVs can wrap around and CAND doesn't have a power of two step,
4184      it is unsafe.  Consider uint16_t CAND with step 9, when wrapping around,
4185      the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say
4186      uint8_t with step 3, those values divided by 3 cast to uint8_t will be
4187      ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59.  */
4188   if (!use->iv->no_overflow
4189       && !cand->iv->no_overflow
4190       && !integer_pow2p (cstep))
4191     return NULL_TREE;
4192
4193   int bits = wi::exact_log2 (rat);
4194   if (bits == -1)
4195     bits = wi::floor_log2 (rat) + 1;
4196   if (!cand->iv->no_overflow
4197       && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype))
4198     return NULL_TREE;
4199
4200   var = var_at_stmt (loop, cand, at);
4201
4202   if (POINTER_TYPE_P (ctype))
4203     {
4204       ctype = unsigned_type_for (ctype);
4205       cbase = fold_convert (ctype, cbase);
4206       cstep = fold_convert (ctype, cstep);
4207       var = fold_convert (ctype, var);
4208     }
4209
4210   if (stmt_after_increment (loop, cand, at))
4211     var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var,
4212                        unshare_expr (cstep));
4213
4214   var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase);
4215   var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var,
4216                      wide_int_to_tree (TREE_TYPE (var), rat));
4217   if (POINTER_TYPE_P (utype))
4218     {
4219       var = fold_convert (sizetype, var);
4220       if (neg_p)
4221         var = fold_build1 (NEGATE_EXPR, sizetype, var);
4222       var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var);
4223     }
4224   else
4225     {
4226       var = fold_convert (utype, var);
4227       var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype,
4228                          ubase, var);
4229     }
4230   return var;
4231 }
4232
4233 /* Adjust the cost COST for being in loop setup rather than loop body.
4234    If we're optimizing for space, the loop setup overhead is constant;
4235    if we're optimizing for speed, amortize it over the per-iteration cost.
4236    If ROUND_UP_P is true, the result is round up rather than to zero when
4237    optimizing for speed.  */
4238 static int64_t
4239 adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4240                    bool round_up_p = false)
4241 {
4242   if (cost == INFTY)
4243     return cost;
4244   else if (optimize_loop_for_speed_p (data->current_loop))
4245     {
4246       int64_t niters = (int64_t) avg_loop_niter (data->current_loop);
4247       return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4248     }
4249   else
4250     return cost;
4251 }
4252
4253 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4254    EXPR operand holding the shift.  COST0 and COST1 are the costs for
4255    calculating the operands of EXPR.  Returns true if successful, and returns
4256    the cost in COST.  */
4257
4258 static bool
4259 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4260                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4261 {
4262   comp_cost res;
4263   tree op1 = TREE_OPERAND (expr, 1);
4264   tree cst = TREE_OPERAND (mult, 1);
4265   tree multop = TREE_OPERAND (mult, 0);
4266   int m = exact_log2 (int_cst_value (cst));
4267   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4268   int as_cost, sa_cost;
4269   bool mult_in_op1;
4270
4271   if (!(m >= 0 && m < maxm))
4272     return false;
4273
4274   STRIP_NOPS (op1);
4275   mult_in_op1 = operand_equal_p (op1, mult, 0);
4276
4277   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4278
4279   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4280      use that in preference to a shift insn followed by an add insn.  */
4281   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4282              ? shiftadd_cost (speed, mode, m)
4283              : (mult_in_op1
4284                 ? shiftsub1_cost (speed, mode, m)
4285                 : shiftsub0_cost (speed, mode, m)));
4286
4287   res = comp_cost (MIN (as_cost, sa_cost), 0);
4288   res += (mult_in_op1 ? cost0 : cost1);
4289
4290   STRIP_NOPS (multop);
4291   if (!is_gimple_val (multop))
4292     res += force_expr_to_var_cost (multop, speed);
4293
4294   *cost = res;
4295   return true;
4296 }
4297
4298 /* Estimates cost of forcing expression EXPR into a variable.  */
4299
4300 static comp_cost
4301 force_expr_to_var_cost (tree expr, bool speed)
4302 {
4303   static bool costs_initialized = false;
4304   static unsigned integer_cost [2];
4305   static unsigned symbol_cost [2];
4306   static unsigned address_cost [2];
4307   tree op0, op1;
4308   comp_cost cost0, cost1, cost;
4309   machine_mode mode;
4310   scalar_int_mode int_mode;
4311
4312   if (!costs_initialized)
4313     {
4314       tree type = build_pointer_type (integer_type_node);
4315       tree var, addr;
4316       rtx x;
4317       int i;
4318
4319       var = create_tmp_var_raw (integer_type_node, "test_var");
4320       TREE_STATIC (var) = 1;
4321       x = produce_memory_decl_rtl (var, NULL);
4322       SET_DECL_RTL (var, x);
4323
4324       addr = build1 (ADDR_EXPR, type, var);
4325
4326
4327       for (i = 0; i < 2; i++)
4328         {
4329           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4330                                                              2000), i);
4331
4332           symbol_cost[i] = computation_cost (addr, i) + 1;
4333
4334           address_cost[i]
4335             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4336           if (dump_file && (dump_flags & TDF_DETAILS))
4337             {
4338               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4339               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4340               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4341               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4342               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4343               fprintf (dump_file, "\n");
4344             }
4345         }
4346
4347       costs_initialized = true;
4348     }
4349
4350   STRIP_NOPS (expr);
4351
4352   if (SSA_VAR_P (expr))
4353     return no_cost;
4354
4355   if (is_gimple_min_invariant (expr))
4356     {
4357       if (poly_int_tree_p (expr))
4358         return comp_cost (integer_cost [speed], 0);
4359
4360       if (TREE_CODE (expr) == ADDR_EXPR)
4361         {
4362           tree obj = TREE_OPERAND (expr, 0);
4363
4364           if (VAR_P (obj)
4365               || TREE_CODE (obj) == PARM_DECL
4366               || TREE_CODE (obj) == RESULT_DECL)
4367             return comp_cost (symbol_cost [speed], 0);
4368         }
4369
4370       return comp_cost (address_cost [speed], 0);
4371     }
4372
4373   switch (TREE_CODE (expr))
4374     {
4375     case POINTER_PLUS_EXPR:
4376     case PLUS_EXPR:
4377     case MINUS_EXPR:
4378     case MULT_EXPR:
4379     case TRUNC_DIV_EXPR:
4380     case BIT_AND_EXPR:
4381     case BIT_IOR_EXPR:
4382     case LSHIFT_EXPR:
4383     case RSHIFT_EXPR:
4384       op0 = TREE_OPERAND (expr, 0);
4385       op1 = TREE_OPERAND (expr, 1);
4386       STRIP_NOPS (op0);
4387       STRIP_NOPS (op1);
4388       break;
4389
4390     CASE_CONVERT:
4391     case NEGATE_EXPR:
4392     case BIT_NOT_EXPR:
4393       op0 = TREE_OPERAND (expr, 0);
4394       STRIP_NOPS (op0);
4395       op1 = NULL_TREE;
4396       break;
4397     /* See add_iv_candidate_for_doloop, for doloop may_be_zero case, we
4398        introduce COND_EXPR for IV base, need to support better cost estimation
4399        for this COND_EXPR and tcc_comparison.  */
4400     case COND_EXPR:
4401       op0 = TREE_OPERAND (expr, 1);
4402       STRIP_NOPS (op0);
4403       op1 = TREE_OPERAND (expr, 2);
4404       STRIP_NOPS (op1);
4405       break;
4406     case LT_EXPR:
4407     case LE_EXPR:
4408     case GT_EXPR:
4409     case GE_EXPR:
4410     case EQ_EXPR:
4411     case NE_EXPR:
4412     case UNORDERED_EXPR:
4413     case ORDERED_EXPR:
4414     case UNLT_EXPR:
4415     case UNLE_EXPR:
4416     case UNGT_EXPR:
4417     case UNGE_EXPR:
4418     case UNEQ_EXPR:
4419     case LTGT_EXPR:
4420     case MAX_EXPR:
4421     case MIN_EXPR:
4422       op0 = TREE_OPERAND (expr, 0);
4423       STRIP_NOPS (op0);
4424       op1 = TREE_OPERAND (expr, 1);
4425       STRIP_NOPS (op1);
4426       break;
4427
4428     default:
4429       /* Just an arbitrary value, FIXME.  */
4430       return comp_cost (target_spill_cost[speed], 0);
4431     }
4432
4433   if (op0 == NULL_TREE
4434       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4435     cost0 = no_cost;
4436   else
4437     cost0 = force_expr_to_var_cost (op0, speed);
4438
4439   if (op1 == NULL_TREE
4440       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4441     cost1 = no_cost;
4442   else
4443     cost1 = force_expr_to_var_cost (op1, speed);
4444
4445   mode = TYPE_MODE (TREE_TYPE (expr));
4446   switch (TREE_CODE (expr))
4447     {
4448     case POINTER_PLUS_EXPR:
4449     case PLUS_EXPR:
4450     case MINUS_EXPR:
4451     case NEGATE_EXPR:
4452       cost = comp_cost (add_cost (speed, mode), 0);
4453       if (TREE_CODE (expr) != NEGATE_EXPR)
4454         {
4455           tree mult = NULL_TREE;
4456           comp_cost sa_cost;
4457           if (TREE_CODE (op1) == MULT_EXPR)
4458             mult = op1;
4459           else if (TREE_CODE (op0) == MULT_EXPR)
4460             mult = op0;
4461
4462           if (mult != NULL_TREE
4463               && is_a <scalar_int_mode> (mode, &int_mode)
4464               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4465               && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4466                                     speed, &sa_cost))
4467             return sa_cost;
4468         }
4469       break;
4470
4471     CASE_CONVERT:
4472       {
4473         tree inner_mode, outer_mode;
4474         outer_mode = TREE_TYPE (expr);
4475         inner_mode = TREE_TYPE (op0);
4476         cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4477                                        TYPE_MODE (inner_mode), speed), 0);
4478       }
4479       break;
4480
4481     case MULT_EXPR:
4482       if (cst_and_fits_in_hwi (op0))
4483         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4484                                              mode, speed), 0);
4485       else if (cst_and_fits_in_hwi (op1))
4486         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4487                                              mode, speed), 0);
4488       else
4489         return comp_cost (target_spill_cost [speed], 0);
4490       break;
4491
4492     case TRUNC_DIV_EXPR:
4493       /* Division by power of two is usually cheap, so we allow it.  Forbid
4494          anything else.  */
4495       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4496         cost = comp_cost (add_cost (speed, mode), 0);
4497       else
4498         cost = comp_cost (target_spill_cost[speed], 0);
4499       break;
4500
4501     case BIT_AND_EXPR:
4502     case BIT_IOR_EXPR:
4503     case BIT_NOT_EXPR:
4504     case LSHIFT_EXPR:
4505     case RSHIFT_EXPR:
4506       cost = comp_cost (add_cost (speed, mode), 0);
4507       break;
4508     case COND_EXPR:
4509       op0 = TREE_OPERAND (expr, 0);
4510       STRIP_NOPS (op0);
4511       if (op0 == NULL_TREE || TREE_CODE (op0) == SSA_NAME
4512           || CONSTANT_CLASS_P (op0))
4513         cost = no_cost;
4514       else
4515         cost = force_expr_to_var_cost (op0, speed);
4516       break;
4517     case LT_EXPR:
4518     case LE_EXPR:
4519     case GT_EXPR:
4520     case GE_EXPR:
4521     case EQ_EXPR:
4522     case NE_EXPR:
4523     case UNORDERED_EXPR:
4524     case ORDERED_EXPR:
4525     case UNLT_EXPR:
4526     case UNLE_EXPR:
4527     case UNGT_EXPR:
4528     case UNGE_EXPR:
4529     case UNEQ_EXPR:
4530     case LTGT_EXPR:
4531     case MAX_EXPR:
4532     case MIN_EXPR:
4533       /* Simply use add cost for now, FIXME if there is some more accurate cost
4534          evaluation way.  */
4535       cost = comp_cost (add_cost (speed, mode), 0);
4536       break;
4537
4538     default:
4539       gcc_unreachable ();
4540     }
4541
4542   cost += cost0;
4543   cost += cost1;
4544   return cost;
4545 }
4546
4547 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4548    invariants the computation depends on.  */
4549
4550 static comp_cost
4551 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4552 {
4553   if (!expr)
4554     return no_cost;
4555
4556   find_inv_vars (data, &expr, inv_vars);
4557   return force_expr_to_var_cost (expr, data->speed);
4558 }
4559
4560 /* Returns cost of auto-modifying address expression in shape base + offset.
4561    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4562    address expression.  The address expression has ADDR_MODE in addr space
4563    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4564    speed or size.  */
4565
4566 enum ainc_type
4567 {
4568   AINC_PRE_INC,         /* Pre increment.  */
4569   AINC_PRE_DEC,         /* Pre decrement.  */
4570   AINC_POST_INC,        /* Post increment.  */
4571   AINC_POST_DEC,        /* Post decrement.  */
4572   AINC_NONE             /* Also the number of auto increment types.  */
4573 };
4574
4575 struct ainc_cost_data
4576 {
4577   int64_t costs[AINC_NONE];
4578 };
4579
4580 static comp_cost
4581 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4582                        machine_mode addr_mode, machine_mode mem_mode,
4583                        addr_space_t as, bool speed)
4584 {
4585   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4586       && !USE_STORE_PRE_DECREMENT (mem_mode)
4587       && !USE_LOAD_POST_DECREMENT (mem_mode)
4588       && !USE_STORE_POST_DECREMENT (mem_mode)
4589       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4590       && !USE_STORE_PRE_INCREMENT (mem_mode)
4591       && !USE_LOAD_POST_INCREMENT (mem_mode)
4592       && !USE_STORE_POST_INCREMENT (mem_mode))
4593     return infinite_cost;
4594
4595   static vec<ainc_cost_data *> ainc_cost_data_list;
4596   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4597   if (idx >= ainc_cost_data_list.length ())
4598     {
4599       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4600
4601       gcc_assert (nsize > idx);
4602       ainc_cost_data_list.safe_grow_cleared (nsize, true);
4603     }
4604
4605   ainc_cost_data *data = ainc_cost_data_list[idx];
4606   if (data == NULL)
4607     {
4608       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4609
4610       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4611       data->costs[AINC_PRE_DEC] = INFTY;
4612       data->costs[AINC_POST_DEC] = INFTY;
4613       data->costs[AINC_PRE_INC] = INFTY;
4614       data->costs[AINC_POST_INC] = INFTY;
4615       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4616           || USE_STORE_PRE_DECREMENT (mem_mode))
4617         {
4618           rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4619
4620           if (memory_address_addr_space_p (mem_mode, addr, as))
4621             data->costs[AINC_PRE_DEC]
4622               = address_cost (addr, mem_mode, as, speed);
4623         }
4624       if (USE_LOAD_POST_DECREMENT (mem_mode)
4625           || USE_STORE_POST_DECREMENT (mem_mode))
4626         {
4627           rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4628
4629           if (memory_address_addr_space_p (mem_mode, addr, as))
4630             data->costs[AINC_POST_DEC]
4631               = address_cost (addr, mem_mode, as, speed);
4632         }
4633       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4634           || USE_STORE_PRE_INCREMENT (mem_mode))
4635         {
4636           rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4637
4638           if (memory_address_addr_space_p (mem_mode, addr, as))
4639             data->costs[AINC_PRE_INC]
4640               = address_cost (addr, mem_mode, as, speed);
4641         }
4642       if (USE_LOAD_POST_INCREMENT (mem_mode)
4643           || USE_STORE_POST_INCREMENT (mem_mode))
4644         {
4645           rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4646
4647           if (memory_address_addr_space_p (mem_mode, addr, as))
4648             data->costs[AINC_POST_INC]
4649               = address_cost (addr, mem_mode, as, speed);
4650         }
4651       ainc_cost_data_list[idx] = data;
4652     }
4653
4654   poly_int64 msize = GET_MODE_SIZE (mem_mode);
4655   if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4656     return comp_cost (data->costs[AINC_POST_INC], 0);
4657   if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4658     return comp_cost (data->costs[AINC_POST_DEC], 0);
4659   if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4660     return comp_cost (data->costs[AINC_PRE_INC], 0);
4661   if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4662     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4663
4664   return infinite_cost;
4665 }
4666
4667 /* Return cost of computing USE's address expression by using CAND.
4668    AFF_INV and AFF_VAR represent invariant and variant parts of the
4669    address expression, respectively.  If AFF_INV is simple, store
4670    the loop invariant variables which are depended by it in INV_VARS;
4671    if AFF_INV is complicated, handle it as a new invariant expression
4672    and record it in INV_EXPR.  RATIO indicates multiple times between
4673    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4674    value to it indicating if this is an auto-increment address.  */
4675
4676 static comp_cost
4677 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4678                   struct iv_cand *cand, aff_tree *aff_inv,
4679                   aff_tree *aff_var, HOST_WIDE_INT ratio,
4680                   bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4681                   bool *can_autoinc, bool speed)
4682 {
4683   rtx addr;
4684   bool simple_inv = true;
4685   tree comp_inv = NULL_TREE, type = aff_var->type;
4686   comp_cost var_cost = no_cost, cost = no_cost;
4687   struct mem_address parts = {NULL_TREE, integer_one_node,
4688                               NULL_TREE, NULL_TREE, NULL_TREE};
4689   machine_mode addr_mode = TYPE_MODE (type);
4690   machine_mode mem_mode = TYPE_MODE (use->mem_type);
4691   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4692   /* Only true if ratio != 1.  */
4693   bool ok_with_ratio_p = false;
4694   bool ok_without_ratio_p = false;
4695
4696   if (!aff_combination_const_p (aff_inv))
4697     {
4698       parts.index = integer_one_node;
4699       /* Addressing mode "base + index".  */
4700       ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4701       if (ratio != 1)
4702         {
4703           parts.step = wide_int_to_tree (type, ratio);
4704           /* Addressing mode "base + index << scale".  */
4705           ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4706           if (!ok_with_ratio_p)
4707             parts.step = NULL_TREE;
4708         }
4709       if (ok_with_ratio_p || ok_without_ratio_p)
4710         {
4711           if (maybe_ne (aff_inv->offset, 0))
4712             {
4713               parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4714               /* Addressing mode "base + index [<< scale] + offset".  */
4715               if (!valid_mem_ref_p (mem_mode, as, &parts))
4716                 parts.offset = NULL_TREE;
4717               else
4718                 aff_inv->offset = 0;
4719             }
4720
4721           move_fixed_address_to_symbol (&parts, aff_inv);
4722           /* Base is fixed address and is moved to symbol part.  */
4723           if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4724             parts.base = NULL_TREE;
4725
4726           /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4727           if (parts.symbol != NULL_TREE
4728               && !valid_mem_ref_p (mem_mode, as, &parts))
4729             {
4730               aff_combination_add_elt (aff_inv, parts.symbol, 1);
4731               parts.symbol = NULL_TREE;
4732               /* Reset SIMPLE_INV since symbol address needs to be computed
4733                  outside of address expression in this case.  */
4734               simple_inv = false;
4735               /* Symbol part is moved back to base part, it can't be NULL.  */
4736               parts.base = integer_one_node;
4737             }
4738         }
4739       else
4740         parts.index = NULL_TREE;
4741     }
4742   else
4743     {
4744       poly_int64 ainc_step;
4745       if (can_autoinc
4746           && ratio == 1
4747           && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4748         {
4749           poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4750
4751           if (stmt_after_increment (data->current_loop, cand, use->stmt))
4752             ainc_offset += ainc_step;
4753           cost = get_address_cost_ainc (ainc_step, ainc_offset,
4754                                         addr_mode, mem_mode, as, speed);
4755           if (!cost.infinite_cost_p ())
4756             {
4757               *can_autoinc = true;
4758               return cost;
4759             }
4760           cost = no_cost;
4761         }
4762       if (!aff_combination_zero_p (aff_inv))
4763         {
4764           parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4765           /* Addressing mode "base + offset".  */
4766           if (!valid_mem_ref_p (mem_mode, as, &parts))
4767             parts.offset = NULL_TREE;
4768           else
4769             aff_inv->offset = 0;
4770         }
4771     }
4772
4773   if (simple_inv)
4774     simple_inv = (aff_inv == NULL
4775                   || aff_combination_const_p (aff_inv)
4776                   || aff_combination_singleton_var_p (aff_inv));
4777   if (!aff_combination_zero_p (aff_inv))
4778     comp_inv = aff_combination_to_tree (aff_inv);
4779   if (comp_inv != NULL_TREE)
4780     cost = force_var_cost (data, comp_inv, inv_vars);
4781   if (ratio != 1 && parts.step == NULL_TREE)
4782     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4783   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4784     var_cost += add_cost (speed, addr_mode);
4785
4786   if (comp_inv && inv_expr && !simple_inv)
4787     {
4788       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4789       /* Clear depends on.  */
4790       if (*inv_expr != NULL && inv_vars && *inv_vars)
4791         bitmap_clear (*inv_vars);
4792
4793       /* Cost of small invariant expression adjusted against loop niters
4794          is usually zero, which makes it difficult to be differentiated
4795          from candidate based on loop invariant variables.  Secondly, the
4796          generated invariant expression may not be hoisted out of loop by
4797          following pass.  We penalize the cost by rounding up in order to
4798          neutralize such effects.  */
4799       cost.cost = adjust_setup_cost (data, cost.cost, true);
4800       cost.scratch = cost.cost;
4801     }
4802
4803   cost += var_cost;
4804   addr = addr_for_mem_ref (&parts, as, false);
4805   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4806   cost += address_cost (addr, mem_mode, as, speed);
4807
4808   if (parts.symbol != NULL_TREE)
4809     cost.complexity += 1;
4810   /* Don't increase the complexity of adding a scaled index if it's
4811      the only kind of index that the target allows.  */
4812   if (parts.step != NULL_TREE && ok_without_ratio_p)
4813     cost.complexity += 1;
4814   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4815     cost.complexity += 1;
4816   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4817     cost.complexity += 1;
4818
4819   return cost;
4820 }
4821
4822 /* Scale (multiply) the computed COST (except scratch part that should be
4823    hoisted out a loop) by header->frequency / AT->frequency, which makes
4824    expected cost more accurate.  */
4825
4826 static comp_cost
4827 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4828 {
4829   if (data->speed
4830       && data->current_loop->header->count.to_frequency (cfun) > 0)
4831     {
4832       basic_block bb = gimple_bb (at);
4833       gcc_assert (cost.scratch <= cost.cost);
4834       int scale_factor = (int)(intptr_t) bb->aux;
4835       if (scale_factor == 1)
4836         return cost;
4837
4838       int64_t scaled_cost
4839         = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4840
4841       if (dump_file && (dump_flags & TDF_DETAILS))
4842         fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
4843                  "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4844                  1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4845
4846       cost.cost = scaled_cost;
4847     }
4848
4849   return cost;
4850 }
4851
4852 /* Determines the cost of the computation by that USE is expressed
4853    from induction variable CAND.  If ADDRESS_P is true, we just need
4854    to create an address from it, otherwise we want to get it into
4855    register.  A set of invariants we depend on is stored in INV_VARS.
4856    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4857    addressing is likely.  If INV_EXPR is nonnull, record invariant
4858    expr entry in it.  */
4859
4860 static comp_cost
4861 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4862                       struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4863                       bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4864 {
4865   gimple *at = use->stmt;
4866   tree ubase = use->iv->base, cbase = cand->iv->base;
4867   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4868   tree comp_inv = NULL_TREE;
4869   HOST_WIDE_INT ratio, aratio;
4870   comp_cost cost;
4871   widest_int rat;
4872   aff_tree aff_inv, aff_var;
4873   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4874
4875   if (inv_vars)
4876     *inv_vars = NULL;
4877   if (can_autoinc)
4878     *can_autoinc = false;
4879   if (inv_expr)
4880     *inv_expr = NULL;
4881
4882   /* Check if we have enough precision to express the values of use.  */
4883   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4884     return infinite_cost;
4885
4886   if (address_p
4887       || (use->iv->base_object
4888           && cand->iv->base_object
4889           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4890           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4891     {
4892       /* Do not try to express address of an object with computation based
4893          on address of a different object.  This may cause problems in rtl
4894          level alias analysis (that does not expect this to be happening,
4895          as this is illegal in C), and would be unlikely to be useful
4896          anyway.  */
4897       if (use->iv->base_object
4898           && cand->iv->base_object
4899           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4900         return infinite_cost;
4901     }
4902
4903   if (!get_computation_aff_1 (data->current_loop, at, use,
4904                               cand, &aff_inv, &aff_var, &rat)
4905       || !wi::fits_shwi_p (rat))
4906     return infinite_cost;
4907
4908   ratio = rat.to_shwi ();
4909   if (address_p)
4910     {
4911       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4912                                inv_vars, inv_expr, can_autoinc, speed);
4913       cost = get_scaled_computation_cost_at (data, at, cost);
4914       /* For doloop IV cand, add on the extra cost.  */
4915       cost += cand->doloop_p ? targetm.doloop_cost_for_address : 0;
4916       return cost;
4917     }
4918
4919   bool simple_inv = (aff_combination_const_p (&aff_inv)
4920                      || aff_combination_singleton_var_p (&aff_inv));
4921   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4922   aff_combination_convert (&aff_inv, signed_type);
4923   if (!aff_combination_zero_p (&aff_inv))
4924     comp_inv = aff_combination_to_tree (&aff_inv);
4925
4926   cost = force_var_cost (data, comp_inv, inv_vars);
4927   if (comp_inv && inv_expr && !simple_inv)
4928     {
4929       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4930       /* Clear depends on.  */
4931       if (*inv_expr != NULL && inv_vars && *inv_vars)
4932         bitmap_clear (*inv_vars);
4933
4934       cost.cost = adjust_setup_cost (data, cost.cost);
4935       /* Record setup cost in scratch field.  */
4936       cost.scratch = cost.cost;
4937     }
4938   /* Cost of constant integer can be covered when adding invariant part to
4939      variant part.  */
4940   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4941     cost = no_cost;
4942
4943   /* Need type narrowing to represent use with cand.  */
4944   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4945     {
4946       machine_mode outer_mode = TYPE_MODE (utype);
4947       machine_mode inner_mode = TYPE_MODE (ctype);
4948       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4949     }
4950
4951   /* Turn a + i * (-c) into a - i * c.  */
4952   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4953     aratio = -ratio;
4954   else
4955     aratio = ratio;
4956
4957   if (ratio != 1)
4958     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4959
4960   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4961      instruction.  */
4962   /* Need to add up the invariant and variant parts.  */
4963   if (comp_inv && !integer_zerop (comp_inv))
4964     cost += add_cost (speed, TYPE_MODE (utype));
4965
4966   cost = get_scaled_computation_cost_at (data, at, cost);
4967
4968   /* For doloop IV cand, add on the extra cost.  */
4969   if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR)
4970     cost += targetm.doloop_cost_for_generic;
4971
4972   return cost;
4973 }
4974
4975 /* Determines cost of computing the use in GROUP with CAND in a generic
4976    expression.  */
4977
4978 static bool
4979 determine_group_iv_cost_generic (struct ivopts_data *data,
4980                                  struct iv_group *group, struct iv_cand *cand)
4981 {
4982   comp_cost cost;
4983   iv_inv_expr_ent *inv_expr = NULL;
4984   bitmap inv_vars = NULL, inv_exprs = NULL;
4985   struct iv_use *use = group->vuses[0];
4986
4987   /* The simple case first -- if we need to express value of the preserved
4988      original biv, the cost is 0.  This also prevents us from counting the
4989      cost of increment twice -- once at this use and once in the cost of
4990      the candidate.  */
4991   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4992     cost = no_cost;
4993   /* If the IV candidate involves undefined SSA values and is not the
4994      same IV as on the USE avoid using that candidate here.  */
4995   else if (cand->involves_undefs
4996            && (!use->iv || !operand_equal_p (cand->iv->base, use->iv->base, 0)))
4997     return false;
4998   else
4999     cost = get_computation_cost (data, use, cand, false,
5000                                  &inv_vars, NULL, &inv_expr);
5001
5002   if (inv_expr)
5003     {
5004       inv_exprs = BITMAP_ALLOC (NULL);
5005       bitmap_set_bit (inv_exprs, inv_expr->id);
5006     }
5007   set_group_iv_cost (data, group, cand, cost, inv_vars,
5008                      NULL_TREE, ERROR_MARK, inv_exprs);
5009   return !cost.infinite_cost_p ();
5010 }
5011
5012 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
5013
5014 static bool
5015 determine_group_iv_cost_address (struct ivopts_data *data,
5016                                  struct iv_group *group, struct iv_cand *cand)
5017 {
5018   unsigned i;
5019   bitmap inv_vars = NULL, inv_exprs = NULL;
5020   bool can_autoinc;
5021   iv_inv_expr_ent *inv_expr = NULL;
5022   struct iv_use *use = group->vuses[0];
5023   comp_cost sum_cost = no_cost, cost;
5024
5025   cost = get_computation_cost (data, use, cand, true,
5026                                &inv_vars, &can_autoinc, &inv_expr);
5027
5028   if (inv_expr)
5029     {
5030       inv_exprs = BITMAP_ALLOC (NULL);
5031       bitmap_set_bit (inv_exprs, inv_expr->id);
5032     }
5033   sum_cost = cost;
5034   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5035     {
5036       if (can_autoinc)
5037         sum_cost -= cand->cost_step;
5038       /* If we generated the candidate solely for exploiting autoincrement
5039          opportunities, and it turns out it can't be used, set the cost to
5040          infinity to make sure we ignore it.  */
5041       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5042         sum_cost = infinite_cost;
5043     }
5044
5045   /* Uses in a group can share setup code, so only add setup cost once.  */
5046   cost -= cost.scratch;
5047   /* Compute and add costs for rest uses of this group.  */
5048   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5049     {
5050       struct iv_use *next = group->vuses[i];
5051
5052       /* TODO: We could skip computing cost for sub iv_use when it has the
5053          same cost as the first iv_use, but the cost really depends on the
5054          offset and where the iv_use is.  */
5055         cost = get_computation_cost (data, next, cand, true,
5056                                      NULL, &can_autoinc, &inv_expr);
5057         if (inv_expr)
5058           {
5059             if (!inv_exprs)
5060               inv_exprs = BITMAP_ALLOC (NULL);
5061
5062             bitmap_set_bit (inv_exprs, inv_expr->id);
5063           }
5064       sum_cost += cost;
5065     }
5066   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
5067                      NULL_TREE, ERROR_MARK, inv_exprs);
5068
5069   return !sum_cost.infinite_cost_p ();
5070 }
5071
5072 /* Computes value of candidate CAND at position AT in iteration DESC->NITER,
5073    and stores it to VAL.  */
5074
5075 static void
5076 cand_value_at (class loop *loop, struct iv_cand *cand, gimple *at,
5077                class tree_niter_desc *desc, aff_tree *val)
5078 {
5079   aff_tree step, delta, nit;
5080   struct iv *iv = cand->iv;
5081   tree type = TREE_TYPE (iv->base);
5082   tree niter = desc->niter;
5083   bool after_adjust = stmt_after_increment (loop, cand, at);
5084   tree steptype;
5085
5086   if (POINTER_TYPE_P (type))
5087     steptype = sizetype;
5088   else
5089     steptype = unsigned_type_for (type);
5090
5091   /* If AFTER_ADJUST is required, the code below generates the equivalent
5092      of BASE + NITER * STEP + STEP, when ideally we'd prefer the expression
5093      BASE + (NITER + 1) * STEP, especially when NITER is often of the form
5094      SSA_NAME - 1.  Unfortunately, guaranteeing that adding 1 to NITER
5095      doesn't overflow is tricky, so we peek inside the TREE_NITER_DESC
5096      class for common idioms that we know are safe.  */
5097   if (after_adjust
5098       && desc->control.no_overflow
5099       && integer_onep (desc->control.step)
5100       && (desc->cmp == LT_EXPR
5101           || desc->cmp == NE_EXPR)
5102       && TREE_CODE (desc->bound) == SSA_NAME)
5103     {
5104       if (integer_onep (desc->control.base))
5105         {
5106           niter = desc->bound;
5107           after_adjust = false;
5108         }
5109       else if (TREE_CODE (niter) == MINUS_EXPR
5110                && integer_onep (TREE_OPERAND (niter, 1)))
5111         {
5112           niter = TREE_OPERAND (niter, 0);
5113           after_adjust = false;
5114         }
5115     }
5116
5117   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5118   aff_combination_convert (&step, steptype);
5119   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5120   aff_combination_convert (&nit, steptype);
5121   aff_combination_mult (&nit, &step, &delta);
5122   if (after_adjust)
5123     aff_combination_add (&delta, &step);
5124
5125   tree_to_aff_combination (iv->base, type, val);
5126   if (!POINTER_TYPE_P (type))
5127     aff_combination_convert (val, steptype);
5128   aff_combination_add (val, &delta);
5129 }
5130
5131 /* Returns period of induction variable iv.  */
5132
5133 static tree
5134 iv_period (struct iv *iv)
5135 {
5136   tree step = iv->step, period, type;
5137   tree pow2div;
5138
5139   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5140
5141   type = unsigned_type_for (TREE_TYPE (step));
5142   /* Period of the iv is lcm (step, type_range)/step -1,
5143      i.e., N*type_range/step - 1. Since type range is power
5144      of two, N == (step >> num_of_ending_zeros_binary (step),
5145      so the final result is
5146
5147        (type_range >> num_of_ending_zeros_binary (step)) - 1
5148
5149   */
5150   pow2div = num_ending_zeros (step);
5151
5152   period = build_low_bits_mask (type,
5153                                 (TYPE_PRECISION (type)
5154                                  - tree_to_uhwi (pow2div)));
5155
5156   return period;
5157 }
5158
5159 /* Returns the comparison operator used when eliminating the iv USE.  */
5160
5161 static enum tree_code
5162 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5163 {
5164   class loop *loop = data->current_loop;
5165   basic_block ex_bb;
5166   edge exit;
5167
5168   ex_bb = gimple_bb (use->stmt);
5169   exit = EDGE_SUCC (ex_bb, 0);
5170   if (flow_bb_inside_loop_p (loop, exit->dest))
5171     exit = EDGE_SUCC (ex_bb, 1);
5172
5173   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5174 }
5175
5176 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5177    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5178    calculation is performed in non-wrapping type.
5179
5180    TODO: More generally, we could test for the situation that
5181          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5182          This would require knowing the sign of OFFSET.  */
5183
5184 static bool
5185 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5186 {
5187   enum tree_code code;
5188   tree e1, e2;
5189   aff_tree aff_e1, aff_e2, aff_offset;
5190
5191   if (!nowrap_type_p (TREE_TYPE (base)))
5192     return false;
5193
5194   base = expand_simple_operations (base);
5195
5196   if (TREE_CODE (base) == SSA_NAME)
5197     {
5198       gimple *stmt = SSA_NAME_DEF_STMT (base);
5199
5200       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5201         return false;
5202
5203       code = gimple_assign_rhs_code (stmt);
5204       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5205         return false;
5206
5207       e1 = gimple_assign_rhs1 (stmt);
5208       e2 = gimple_assign_rhs2 (stmt);
5209     }
5210   else
5211     {
5212       code = TREE_CODE (base);
5213       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5214         return false;
5215       e1 = TREE_OPERAND (base, 0);
5216       e2 = TREE_OPERAND (base, 1);
5217     }
5218
5219   /* Use affine expansion as deeper inspection to prove the equality.  */
5220   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5221                                   &aff_e2, &data->name_expansion_cache);
5222   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5223                                   &aff_offset, &data->name_expansion_cache);
5224   aff_combination_scale (&aff_offset, -1);
5225   switch (code)
5226     {
5227     case PLUS_EXPR:
5228       aff_combination_add (&aff_e2, &aff_offset);
5229       if (aff_combination_zero_p (&aff_e2))
5230         return true;
5231
5232       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5233                                       &aff_e1, &data->name_expansion_cache);
5234       aff_combination_add (&aff_e1, &aff_offset);
5235       return aff_combination_zero_p (&aff_e1);
5236
5237     case POINTER_PLUS_EXPR:
5238       aff_combination_add (&aff_e2, &aff_offset);
5239       return aff_combination_zero_p (&aff_e2);
5240
5241     default:
5242       return false;
5243     }
5244 }
5245
5246 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5247    comparison with CAND.  NITER describes the number of iterations of
5248    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5249
5250    We aim to handle the following situation:
5251
5252    sometype *base, *p;
5253    int a, b, i;
5254
5255    i = a;
5256    p = p_0 = base + a;
5257
5258    do
5259      {
5260        bla (*p);
5261        p++;
5262        i++;
5263      }
5264    while (i < b);
5265
5266    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5267    We aim to optimize this to
5268
5269    p = p_0 = base + a;
5270    do
5271      {
5272        bla (*p);
5273        p++;
5274      }
5275    while (p < p_0 - a + b);
5276
5277    This preserves the correctness, since the pointer arithmetics does not
5278    overflow.  More precisely:
5279
5280    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5281       overflow in computing it or the values of p.
5282    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5283       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5284
5285 static bool
5286 iv_elimination_compare_lt (struct ivopts_data *data,
5287                            struct iv_cand *cand, enum tree_code *comp_p,
5288                            class tree_niter_desc *niter)
5289 {
5290   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5291   class aff_tree nit, tmpa, tmpb;
5292   enum tree_code comp;
5293   HOST_WIDE_INT step;
5294
5295   /* We need to know that the candidate induction variable does not overflow.
5296      While more complex analysis may be used to prove this, for now just
5297      check that the variable appears in the original program and that it
5298      is computed in a type that guarantees no overflows.  */
5299   cand_type = TREE_TYPE (cand->iv->base);
5300   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5301     return false;
5302
5303   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5304      the calculation of the BOUND could overflow, making the comparison
5305      invalid.  */
5306   if (!data->loop_single_exit_p)
5307     return false;
5308
5309   /* We need to be able to decide whether candidate is increasing or decreasing
5310      in order to choose the right comparison operator.  */
5311   if (!cst_and_fits_in_hwi (cand->iv->step))
5312     return false;
5313   step = int_cst_value (cand->iv->step);
5314
5315   /* Check that the number of iterations matches the expected pattern:
5316      a + 1 > b ? 0 : b - a - 1.  */
5317   mbz = niter->may_be_zero;
5318   if (TREE_CODE (mbz) == GT_EXPR)
5319     {
5320       /* Handle a + 1 > b.  */
5321       tree op0 = TREE_OPERAND (mbz, 0);
5322       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5323         {
5324           a = TREE_OPERAND (op0, 0);
5325           b = TREE_OPERAND (mbz, 1);
5326         }
5327       else
5328         return false;
5329     }
5330   else if (TREE_CODE (mbz) == LT_EXPR)
5331     {
5332       tree op1 = TREE_OPERAND (mbz, 1);
5333
5334       /* Handle b < a + 1.  */
5335       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5336         {
5337           a = TREE_OPERAND (op1, 0);
5338           b = TREE_OPERAND (mbz, 0);
5339         }
5340       else
5341         return false;
5342     }
5343   else
5344     return false;
5345
5346   /* Expected number of iterations is B - A - 1.  Check that it matches
5347      the actual number, i.e., that B - A - NITER = 1.  */
5348   tree_to_aff_combination (niter->niter, nit_type, &nit);
5349   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5350   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5351   aff_combination_scale (&nit, -1);
5352   aff_combination_scale (&tmpa, -1);
5353   aff_combination_add (&tmpb, &tmpa);
5354   aff_combination_add (&tmpb, &nit);
5355   if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5356     return false;
5357
5358   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5359      overflow.  */
5360   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5361                         cand->iv->step,
5362                         fold_convert (TREE_TYPE (cand->iv->step), a));
5363   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5364     return false;
5365
5366   /* Determine the new comparison operator.  */
5367   comp = step < 0 ? GT_EXPR : LT_EXPR;
5368   if (*comp_p == NE_EXPR)
5369     *comp_p = comp;
5370   else if (*comp_p == EQ_EXPR)
5371     *comp_p = invert_tree_comparison (comp, false);
5372   else
5373     gcc_unreachable ();
5374
5375   return true;
5376 }
5377
5378 /* Check whether it is possible to express the condition in USE by comparison
5379    of candidate CAND.  If so, store the value compared with to BOUND, and the
5380    comparison operator to COMP.  */
5381
5382 static bool
5383 may_eliminate_iv (struct ivopts_data *data,
5384                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5385                   enum tree_code *comp)
5386 {
5387   basic_block ex_bb;
5388   edge exit;
5389   tree period;
5390   class loop *loop = data->current_loop;
5391   aff_tree bnd;
5392   class tree_niter_desc *desc = NULL;
5393
5394   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5395     return false;
5396
5397   /* For now works only for exits that dominate the loop latch.
5398      TODO: extend to other conditions inside loop body.  */
5399   ex_bb = gimple_bb (use->stmt);
5400   if (use->stmt != last_stmt (ex_bb)
5401       || gimple_code (use->stmt) != GIMPLE_COND
5402       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5403     return false;
5404
5405   exit = EDGE_SUCC (ex_bb, 0);
5406   if (flow_bb_inside_loop_p (loop, exit->dest))
5407     exit = EDGE_SUCC (ex_bb, 1);
5408   if (flow_bb_inside_loop_p (loop, exit->dest))
5409     return false;
5410
5411   desc = niter_for_exit (data, exit);
5412   if (!desc)
5413     return false;
5414
5415   /* Determine whether we can use the variable to test the exit condition.
5416      This is the case iff the period of the induction variable is greater
5417      than the number of iterations for which the exit condition is true.  */
5418   period = iv_period (cand->iv);
5419
5420   /* If the number of iterations is constant, compare against it directly.  */
5421   if (TREE_CODE (desc->niter) == INTEGER_CST)
5422     {
5423       /* See cand_value_at.  */
5424       if (stmt_after_increment (loop, cand, use->stmt))
5425         {
5426           if (!tree_int_cst_lt (desc->niter, period))
5427             return false;
5428         }
5429       else
5430         {
5431           if (tree_int_cst_lt (period, desc->niter))
5432             return false;
5433         }
5434     }
5435
5436   /* If not, and if this is the only possible exit of the loop, see whether
5437      we can get a conservative estimate on the number of iterations of the
5438      entire loop and compare against that instead.  */
5439   else
5440     {
5441       widest_int period_value, max_niter;
5442
5443       max_niter = desc->max;
5444       if (stmt_after_increment (loop, cand, use->stmt))
5445         max_niter += 1;
5446       period_value = wi::to_widest (period);
5447       if (wi::gtu_p (max_niter, period_value))
5448         {
5449           /* See if we can take advantage of inferred loop bound
5450              information.  */
5451           if (data->loop_single_exit_p)
5452             {
5453               if (!max_loop_iterations (loop, &max_niter))
5454                 return false;
5455               /* The loop bound is already adjusted by adding 1.  */
5456               if (wi::gtu_p (max_niter, period_value))
5457                 return false;
5458             }
5459           else
5460             return false;
5461         }
5462     }
5463
5464   /* For doloop IV cand, the bound would be zero.  It's safe whether
5465      may_be_zero set or not.  */
5466   if (cand->doloop_p)
5467     {
5468       *bound = build_int_cst (TREE_TYPE (cand->iv->base), 0);
5469       *comp = iv_elimination_compare (data, use);
5470       return true;
5471     }
5472
5473   cand_value_at (loop, cand, use->stmt, desc, &bnd);
5474
5475   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5476                          aff_combination_to_tree (&bnd));
5477   *comp = iv_elimination_compare (data, use);
5478
5479   /* It is unlikely that computing the number of iterations using division
5480      would be more profitable than keeping the original induction variable.  */
5481   if (expression_expensive_p (*bound))
5482     return false;
5483
5484   /* Sometimes, it is possible to handle the situation that the number of
5485      iterations may be zero unless additional assumptions by using <
5486      instead of != in the exit condition.
5487
5488      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5489            base the exit condition on it.  However, that is often too
5490            expensive.  */
5491   if (!integer_zerop (desc->may_be_zero))
5492     return iv_elimination_compare_lt (data, cand, comp, desc);
5493
5494   return true;
5495 }
5496
5497  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5498     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5499
5500 static int
5501 parm_decl_cost (struct ivopts_data *data, tree bound)
5502 {
5503   tree sbound = bound;
5504   STRIP_NOPS (sbound);
5505
5506   if (TREE_CODE (sbound) == SSA_NAME
5507       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5508       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5509       && data->body_includes_call)
5510     return COSTS_N_INSNS (1);
5511
5512   return 0;
5513 }
5514
5515 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5516
5517 static bool
5518 determine_group_iv_cost_cond (struct ivopts_data *data,
5519                               struct iv_group *group, struct iv_cand *cand)
5520 {
5521   tree bound = NULL_TREE;
5522   struct iv *cmp_iv;
5523   bitmap inv_exprs = NULL;
5524   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5525   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5526   enum comp_iv_rewrite rewrite_type;
5527   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5528   tree *control_var, *bound_cst;
5529   enum tree_code comp = ERROR_MARK;
5530   struct iv_use *use = group->vuses[0];
5531
5532   /* Extract condition operands.  */
5533   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5534                                         &bound_cst, NULL, &cmp_iv);
5535   gcc_assert (rewrite_type != COMP_IV_NA);
5536
5537   /* Try iv elimination.  */
5538   if (rewrite_type == COMP_IV_ELIM
5539       && may_eliminate_iv (data, use, cand, &bound, &comp))
5540     {
5541       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5542       if (elim_cost.cost == 0)
5543         elim_cost.cost = parm_decl_cost (data, bound);
5544       else if (TREE_CODE (bound) == INTEGER_CST)
5545         elim_cost.cost = 0;
5546       /* If we replace a loop condition 'i < n' with 'p < base + n',
5547          inv_vars_elim will have 'base' and 'n' set, which implies that both
5548          'base' and 'n' will be live during the loop.    More likely,
5549          'base + n' will be loop invariant, resulting in only one live value
5550          during the loop.  So in that case we clear inv_vars_elim and set
5551          inv_expr_elim instead.  */
5552       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5553         {
5554           inv_expr_elim = get_loop_invariant_expr (data, bound);
5555           bitmap_clear (inv_vars_elim);
5556         }
5557       /* The bound is a loop invariant, so it will be only computed
5558          once.  */
5559       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5560     }
5561
5562   /* When the condition is a comparison of the candidate IV against
5563      zero, prefer this IV.
5564
5565      TODO: The constant that we're subtracting from the cost should
5566      be target-dependent.  This information should be added to the
5567      target costs for each backend.  */
5568   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5569       && integer_zerop (*bound_cst)
5570       && (operand_equal_p (*control_var, cand->var_after, 0)
5571           || operand_equal_p (*control_var, cand->var_before, 0)))
5572     elim_cost -= 1;
5573
5574   express_cost = get_computation_cost (data, use, cand, false,
5575                                        &inv_vars_express, NULL,
5576                                        &inv_expr_express);
5577   if (cmp_iv != NULL)
5578     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5579
5580   /* Count the cost of the original bound as well.  */
5581   bound_cost = force_var_cost (data, *bound_cst, NULL);
5582   if (bound_cost.cost == 0)
5583     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5584   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5585     bound_cost.cost = 0;
5586   express_cost += bound_cost;
5587
5588   /* Choose the better approach, preferring the eliminated IV. */
5589   if (elim_cost <= express_cost)
5590     {
5591       cost = elim_cost;
5592       inv_vars = inv_vars_elim;
5593       inv_vars_elim = NULL;
5594       inv_expr = inv_expr_elim;
5595       /* For doloop candidate/use pair, adjust to zero cost.  */
5596       if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost)
5597         cost = no_cost;
5598     }
5599   else
5600     {
5601       cost = express_cost;
5602       inv_vars = inv_vars_express;
5603       inv_vars_express = NULL;
5604       bound = NULL_TREE;
5605       comp = ERROR_MARK;
5606       inv_expr = inv_expr_express;
5607     }
5608
5609   if (inv_expr)
5610     {
5611       inv_exprs = BITMAP_ALLOC (NULL);
5612       bitmap_set_bit (inv_exprs, inv_expr->id);
5613     }
5614   set_group_iv_cost (data, group, cand, cost,
5615                      inv_vars, bound, comp, inv_exprs);
5616
5617   if (inv_vars_elim)
5618     BITMAP_FREE (inv_vars_elim);
5619   if (inv_vars_express)
5620     BITMAP_FREE (inv_vars_express);
5621
5622   return !cost.infinite_cost_p ();
5623 }
5624
5625 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5626    if USE cannot be represented with CAND.  */
5627
5628 static bool
5629 determine_group_iv_cost (struct ivopts_data *data,
5630                          struct iv_group *group, struct iv_cand *cand)
5631 {
5632   switch (group->type)
5633     {
5634     case USE_NONLINEAR_EXPR:
5635       return determine_group_iv_cost_generic (data, group, cand);
5636
5637     case USE_REF_ADDRESS:
5638     case USE_PTR_ADDRESS:
5639       return determine_group_iv_cost_address (data, group, cand);
5640
5641     case USE_COMPARE:
5642       return determine_group_iv_cost_cond (data, group, cand);
5643
5644     default:
5645       gcc_unreachable ();
5646     }
5647 }
5648
5649 /* Return true if get_computation_cost indicates that autoincrement is
5650    a possibility for the pair of USE and CAND, false otherwise.  */
5651
5652 static bool
5653 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5654                            struct iv_cand *cand)
5655 {
5656   if (!address_p (use->type))
5657     return false;
5658
5659   bool can_autoinc = false;
5660   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5661   return can_autoinc;
5662 }
5663
5664 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5665    use that allows autoincrement, and set their AINC_USE if possible.  */
5666
5667 static void
5668 set_autoinc_for_original_candidates (struct ivopts_data *data)
5669 {
5670   unsigned i, j;
5671
5672   for (i = 0; i < data->vcands.length (); i++)
5673     {
5674       struct iv_cand *cand = data->vcands[i];
5675       struct iv_use *closest_before = NULL;
5676       struct iv_use *closest_after = NULL;
5677       if (cand->pos != IP_ORIGINAL)
5678         continue;
5679
5680       for (j = 0; j < data->vgroups.length (); j++)
5681         {
5682           struct iv_group *group = data->vgroups[j];
5683           struct iv_use *use = group->vuses[0];
5684           unsigned uid = gimple_uid (use->stmt);
5685
5686           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5687             continue;
5688
5689           if (uid < gimple_uid (cand->incremented_at)
5690               && (closest_before == NULL
5691                   || uid > gimple_uid (closest_before->stmt)))
5692             closest_before = use;
5693
5694           if (uid > gimple_uid (cand->incremented_at)
5695               && (closest_after == NULL
5696                   || uid < gimple_uid (closest_after->stmt)))
5697             closest_after = use;
5698         }
5699
5700       if (closest_before != NULL
5701           && autoinc_possible_for_pair (data, closest_before, cand))
5702         cand->ainc_use = closest_before;
5703       else if (closest_after != NULL
5704                && autoinc_possible_for_pair (data, closest_after, cand))
5705         cand->ainc_use = closest_after;
5706     }
5707 }
5708
5709 /* Relate compare use with all candidates.  */
5710
5711 static void
5712 relate_compare_use_with_all_cands (struct ivopts_data *data)
5713 {
5714   unsigned i, count = data->vcands.length ();
5715   for (i = 0; i < data->vgroups.length (); i++)
5716     {
5717       struct iv_group *group = data->vgroups[i];
5718
5719       if (group->type == USE_COMPARE)
5720         bitmap_set_range (group->related_cands, 0, count);
5721     }
5722 }
5723
5724 /* If PREFERRED_MODE is suitable and profitable, use the preferred
5725    PREFERRED_MODE to compute doloop iv base from niter: base = niter + 1.  */
5726
5727 static tree
5728 compute_doloop_base_on_mode (machine_mode preferred_mode, tree niter,
5729                              const widest_int &iterations_max)
5730 {
5731   tree ntype = TREE_TYPE (niter);
5732   tree pref_type = lang_hooks.types.type_for_mode (preferred_mode, 1);
5733   if (!pref_type)
5734     return fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5735                         build_int_cst (ntype, 1));
5736
5737   gcc_assert (TREE_CODE (pref_type) == INTEGER_TYPE);
5738
5739   int prec = TYPE_PRECISION (ntype);
5740   int pref_prec = TYPE_PRECISION (pref_type);
5741
5742   tree base;
5743
5744   /* Check if the PREFERRED_MODED is able to present niter.  */
5745   if (pref_prec > prec
5746       || wi::ltu_p (iterations_max,
5747                     widest_int::from (wi::max_value (pref_prec, UNSIGNED),
5748                                       UNSIGNED)))
5749     {
5750       /* No wrap, it is safe to use preferred type after niter + 1.  */
5751       if (wi::ltu_p (iterations_max,
5752                      widest_int::from (wi::max_value (prec, UNSIGNED),
5753                                        UNSIGNED)))
5754         {
5755           /* This could help to optimize "-1 +1" pair when niter looks
5756              like "n-1": n is in original mode.  "base = (n - 1) + 1"
5757              in PREFERRED_MODED: it could be base = (PREFERRED_TYPE)n.  */
5758           base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5759                               build_int_cst (ntype, 1));
5760           base = fold_convert (pref_type, base);
5761         }
5762
5763       /* To avoid wrap, convert niter to preferred type before plus 1.  */
5764       else
5765         {
5766           niter = fold_convert (pref_type, niter);
5767           base = fold_build2 (PLUS_EXPR, pref_type, unshare_expr (niter),
5768                               build_int_cst (pref_type, 1));
5769         }
5770     }
5771   else
5772     base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5773                         build_int_cst (ntype, 1));
5774   return base;
5775 }
5776
5777 /* Add one doloop dedicated IV candidate:
5778      - Base is (may_be_zero ? 1 : (niter + 1)).
5779      - Step is -1.  */
5780
5781 static void
5782 add_iv_candidate_for_doloop (struct ivopts_data *data)
5783 {
5784   tree_niter_desc *niter_desc = niter_for_single_dom_exit (data);
5785   gcc_assert (niter_desc && niter_desc->assumptions);
5786
5787   tree niter = niter_desc->niter;
5788   tree ntype = TREE_TYPE (niter);
5789   gcc_assert (TREE_CODE (ntype) == INTEGER_TYPE);
5790
5791   tree may_be_zero = niter_desc->may_be_zero;
5792   if (may_be_zero && integer_zerop (may_be_zero))
5793     may_be_zero = NULL_TREE;
5794   if (may_be_zero)
5795     {
5796       if (COMPARISON_CLASS_P (may_be_zero))
5797         {
5798           niter = fold_build3 (COND_EXPR, ntype, may_be_zero,
5799                                build_int_cst (ntype, 0),
5800                                rewrite_to_non_trapping_overflow (niter));
5801         }
5802       /* Don't try to obtain the iteration count expression when may_be_zero is
5803          integer_nonzerop (actually iteration count is one) or else.  */
5804       else
5805         return;
5806     }
5807
5808   machine_mode mode = TYPE_MODE (ntype);
5809   machine_mode pref_mode = targetm.preferred_doloop_mode (mode);
5810
5811   tree base;
5812   if (mode != pref_mode)
5813     {
5814       base = compute_doloop_base_on_mode (pref_mode, niter, niter_desc->max);
5815       ntype = TREE_TYPE (base);
5816     }
5817   else
5818     base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5819                         build_int_cst (ntype, 1));
5820
5821
5822   add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, true);
5823 }
5824
5825 /* Finds the candidates for the induction variables.  */
5826
5827 static void
5828 find_iv_candidates (struct ivopts_data *data)
5829 {
5830   /* Add commonly used ivs.  */
5831   add_standard_iv_candidates (data);
5832
5833   /* Add doloop dedicated ivs.  */
5834   if (data->doloop_use_p)
5835     add_iv_candidate_for_doloop (data);
5836
5837   /* Add old induction variables.  */
5838   add_iv_candidate_for_bivs (data);
5839
5840   /* Add induction variables derived from uses.  */
5841   add_iv_candidate_for_groups (data);
5842
5843   set_autoinc_for_original_candidates (data);
5844
5845   /* Record the important candidates.  */
5846   record_important_candidates (data);
5847
5848   /* Relate compare iv_use with all candidates.  */
5849   if (!data->consider_all_candidates)
5850     relate_compare_use_with_all_cands (data);
5851
5852   if (dump_file && (dump_flags & TDF_DETAILS))
5853     {
5854       unsigned i;
5855
5856       fprintf (dump_file, "\n<Important Candidates>:\t");
5857       for (i = 0; i < data->vcands.length (); i++)
5858         if (data->vcands[i]->important)
5859           fprintf (dump_file, " %d,", data->vcands[i]->id);
5860       fprintf (dump_file, "\n");
5861
5862       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5863       for (i = 0; i < data->vgroups.length (); i++)
5864         {
5865           struct iv_group *group = data->vgroups[i];
5866
5867           if (group->related_cands)
5868             {
5869               fprintf (dump_file, "  Group %d:\t", group->id);
5870               dump_bitmap (dump_file, group->related_cands);
5871             }
5872         }
5873       fprintf (dump_file, "\n");
5874     }
5875 }
5876
5877 /* Determines costs of computing use of iv with an iv candidate.  */
5878
5879 static void
5880 determine_group_iv_costs (struct ivopts_data *data)
5881 {
5882   unsigned i, j;
5883   struct iv_cand *cand;
5884   struct iv_group *group;
5885   bitmap to_clear = BITMAP_ALLOC (NULL);
5886
5887   alloc_use_cost_map (data);
5888
5889   for (i = 0; i < data->vgroups.length (); i++)
5890     {
5891       group = data->vgroups[i];
5892
5893       if (data->consider_all_candidates)
5894         {
5895           for (j = 0; j < data->vcands.length (); j++)
5896             {
5897               cand = data->vcands[j];
5898               determine_group_iv_cost (data, group, cand);
5899             }
5900         }
5901       else
5902         {
5903           bitmap_iterator bi;
5904
5905           EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5906             {
5907               cand = data->vcands[j];
5908               if (!determine_group_iv_cost (data, group, cand))
5909                 bitmap_set_bit (to_clear, j);
5910             }
5911
5912           /* Remove the candidates for that the cost is infinite from
5913              the list of related candidates.  */
5914           bitmap_and_compl_into (group->related_cands, to_clear);
5915           bitmap_clear (to_clear);
5916         }
5917     }
5918
5919   BITMAP_FREE (to_clear);
5920
5921   if (dump_file && (dump_flags & TDF_DETAILS))
5922     {
5923       bitmap_iterator bi;
5924
5925       /* Dump invariant variables.  */
5926       fprintf (dump_file, "\n<Invariant Vars>:\n");
5927       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5928         {
5929           struct version_info *info = ver_info (data, i);
5930           if (info->inv_id)
5931             {
5932               fprintf (dump_file, "Inv %d:\t", info->inv_id);
5933               print_generic_expr (dump_file, info->name, TDF_SLIM);
5934               fprintf (dump_file, "%s\n",
5935                        info->has_nonlin_use ? "" : "\t(eliminable)");
5936             }
5937         }
5938
5939       /* Dump invariant expressions.  */
5940       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5941       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5942
5943       for (hash_table<iv_inv_expr_hasher>::iterator it
5944            = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5945            ++it)
5946         list.safe_push (*it);
5947
5948       list.qsort (sort_iv_inv_expr_ent);
5949
5950       for (i = 0; i < list.length (); ++i)
5951         {
5952           fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5953           print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5954           fprintf (dump_file, "\n");
5955         }
5956
5957       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5958
5959       for (i = 0; i < data->vgroups.length (); i++)
5960         {
5961           group = data->vgroups[i];
5962
5963           fprintf (dump_file, "Group %d:\n", i);
5964           fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5965           for (j = 0; j < group->n_map_members; j++)
5966             {
5967               if (!group->cost_map[j].cand
5968                   || group->cost_map[j].cost.infinite_cost_p ())
5969                 continue;
5970
5971               fprintf (dump_file, "  %d\t%" PRId64 "\t%d\t",
5972                        group->cost_map[j].cand->id,
5973                        group->cost_map[j].cost.cost,
5974                        group->cost_map[j].cost.complexity);
5975               if (!group->cost_map[j].inv_exprs
5976                   || bitmap_empty_p (group->cost_map[j].inv_exprs))
5977                 fprintf (dump_file, "NIL;\t");
5978               else
5979                 bitmap_print (dump_file,
5980                               group->cost_map[j].inv_exprs, "", ";\t");
5981               if (!group->cost_map[j].inv_vars
5982                   || bitmap_empty_p (group->cost_map[j].inv_vars))
5983                 fprintf (dump_file, "NIL;\n");
5984               else
5985                 bitmap_print (dump_file,
5986                               group->cost_map[j].inv_vars, "", "\n");
5987             }
5988
5989           fprintf (dump_file, "\n");
5990         }
5991       fprintf (dump_file, "\n");
5992     }
5993 }
5994
5995 /* Determines cost of the candidate CAND.  */
5996
5997 static void
5998 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5999 {
6000   comp_cost cost_base;
6001   int64_t cost, cost_step;
6002   tree base;
6003
6004   gcc_assert (cand->iv != NULL);
6005
6006   /* There are two costs associated with the candidate -- its increment
6007      and its initialization.  The second is almost negligible for any loop
6008      that rolls enough, so we take it just very little into account.  */
6009
6010   base = cand->iv->base;
6011   cost_base = force_var_cost (data, base, NULL);
6012   /* It will be exceptional that the iv register happens to be initialized with
6013      the proper value at no cost.  In general, there will at least be a regcopy
6014      or a const set.  */
6015   if (cost_base.cost == 0)
6016     cost_base.cost = COSTS_N_INSNS (1);
6017   /* Doloop decrement should be considered as zero cost.  */
6018   if (cand->doloop_p)
6019     cost_step = 0;
6020   else
6021     cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
6022   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
6023
6024   /* Prefer the original ivs unless we may gain something by replacing it.
6025      The reason is to make debugging simpler; so this is not relevant for
6026      artificial ivs created by other optimization passes.  */
6027   if ((cand->pos != IP_ORIGINAL
6028        || !SSA_NAME_VAR (cand->var_before)
6029        || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
6030       /* Prefer doloop as well.  */
6031       && !cand->doloop_p)
6032     cost++;
6033
6034   /* Prefer not to insert statements into latch unless there are some
6035      already (so that we do not create unnecessary jumps).  */
6036   if (cand->pos == IP_END
6037       && empty_block_p (ip_end_pos (data->current_loop)))
6038     cost++;
6039
6040   cand->cost = cost;
6041   cand->cost_step = cost_step;
6042 }
6043
6044 /* Determines costs of computation of the candidates.  */
6045
6046 static void
6047 determine_iv_costs (struct ivopts_data *data)
6048 {
6049   unsigned i;
6050
6051   if (dump_file && (dump_flags & TDF_DETAILS))
6052     {
6053       fprintf (dump_file, "<Candidate Costs>:\n");
6054       fprintf (dump_file, "  cand\tcost\n");
6055     }
6056
6057   for (i = 0; i < data->vcands.length (); i++)
6058     {
6059       struct iv_cand *cand = data->vcands[i];
6060
6061       determine_iv_cost (data, cand);
6062
6063       if (dump_file && (dump_flags & TDF_DETAILS))
6064         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
6065     }
6066
6067   if (dump_file && (dump_flags & TDF_DETAILS))
6068     fprintf (dump_file, "\n");
6069 }
6070
6071 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
6072    induction variables.  Note N_INVS includes both invariant variables and
6073    invariant expressions.  */
6074
6075 static unsigned
6076 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
6077                               unsigned n_cands)
6078 {
6079   unsigned cost;
6080   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
6081   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
6082   bool speed = data->speed;
6083
6084   /* If there is a call in the loop body, the call-clobbered registers
6085      are not available for loop invariants.  */
6086   if (data->body_includes_call)
6087     available_regs = available_regs - target_clobbered_regs;
6088
6089   /* If we have enough registers.  */
6090   if (regs_needed + target_res_regs < available_regs)
6091     cost = n_new;
6092   /* If close to running out of registers, try to preserve them.  */
6093   else if (regs_needed <= available_regs)
6094     cost = target_reg_cost [speed] * regs_needed;
6095   /* If we run out of available registers but the number of candidates
6096      does not, we penalize extra registers using target_spill_cost.  */
6097   else if (n_cands <= available_regs)
6098     cost = target_reg_cost [speed] * available_regs
6099            + target_spill_cost [speed] * (regs_needed - available_regs);
6100   /* If the number of candidates runs out available registers, we penalize
6101      extra candidate registers using target_spill_cost * 2.  Because it is
6102      more expensive to spill induction variable than invariant.  */
6103   else
6104     cost = target_reg_cost [speed] * available_regs
6105            + target_spill_cost [speed] * (n_cands - available_regs) * 2
6106            + target_spill_cost [speed] * (regs_needed - n_cands);
6107
6108   /* Finally, add the number of candidates, so that we prefer eliminating
6109      induction variables if possible.  */
6110   return cost + n_cands;
6111 }
6112
6113 /* For each size of the induction variable set determine the penalty.  */
6114
6115 static void
6116 determine_set_costs (struct ivopts_data *data)
6117 {
6118   unsigned j, n;
6119   gphi *phi;
6120   gphi_iterator psi;
6121   tree op;
6122   class loop *loop = data->current_loop;
6123   bitmap_iterator bi;
6124
6125   if (dump_file && (dump_flags & TDF_DETAILS))
6126     {
6127       fprintf (dump_file, "<Global Costs>:\n");
6128       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
6129       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
6130       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
6131       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
6132     }
6133
6134   n = 0;
6135   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6136     {
6137       phi = psi.phi ();
6138       op = PHI_RESULT (phi);
6139
6140       if (virtual_operand_p (op))
6141         continue;
6142
6143       if (get_iv (data, op))
6144         continue;
6145
6146       if (!POINTER_TYPE_P (TREE_TYPE (op))
6147           && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
6148         continue;
6149
6150       n++;
6151     }
6152
6153   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6154     {
6155       struct version_info *info = ver_info (data, j);
6156
6157       if (info->inv_id && info->has_nonlin_use)
6158         n++;
6159     }
6160
6161   data->regs_used = n;
6162   if (dump_file && (dump_flags & TDF_DETAILS))
6163     fprintf (dump_file, "  regs_used %d\n", n);
6164
6165   if (dump_file && (dump_flags & TDF_DETAILS))
6166     {
6167       fprintf (dump_file, "  cost for size:\n");
6168       fprintf (dump_file, "  ivs\tcost\n");
6169       for (j = 0; j <= 2 * target_avail_regs; j++)
6170         fprintf (dump_file, "  %d\t%d\n", j,
6171                  ivopts_estimate_reg_pressure (data, 0, j));
6172       fprintf (dump_file, "\n");
6173     }
6174 }
6175
6176 /* Returns true if A is a cheaper cost pair than B.  */
6177
6178 static bool
6179 cheaper_cost_pair (class cost_pair *a, class cost_pair *b)
6180 {
6181   if (!a)
6182     return false;
6183
6184   if (!b)
6185     return true;
6186
6187   if (a->cost < b->cost)
6188     return true;
6189
6190   if (b->cost < a->cost)
6191     return false;
6192
6193   /* In case the costs are the same, prefer the cheaper candidate.  */
6194   if (a->cand->cost < b->cand->cost)
6195     return true;
6196
6197   return false;
6198 }
6199
6200 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
6201    for more expensive, equal and cheaper respectively.  */
6202
6203 static int
6204 compare_cost_pair (class cost_pair *a, class cost_pair *b)
6205 {
6206   if (cheaper_cost_pair (a, b))
6207     return -1;
6208   if (cheaper_cost_pair (b, a))
6209     return 1;
6210
6211   return 0;
6212 }
6213
6214 /* Returns candidate by that USE is expressed in IVS.  */
6215
6216 static class cost_pair *
6217 iv_ca_cand_for_group (class iv_ca *ivs, struct iv_group *group)
6218 {
6219   return ivs->cand_for_group[group->id];
6220 }
6221
6222 /* Computes the cost field of IVS structure.  */
6223
6224 static void
6225 iv_ca_recount_cost (struct ivopts_data *data, class iv_ca *ivs)
6226 {
6227   comp_cost cost = ivs->cand_use_cost;
6228
6229   cost += ivs->cand_cost;
6230   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
6231   ivs->cost = cost;
6232 }
6233
6234 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
6235    and IVS.  */
6236
6237 static void
6238 iv_ca_set_remove_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6239 {
6240   bitmap_iterator bi;
6241   unsigned iid;
6242
6243   if (!invs)
6244     return;
6245
6246   gcc_assert (n_inv_uses != NULL);
6247   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6248     {
6249       n_inv_uses[iid]--;
6250       if (n_inv_uses[iid] == 0)
6251         ivs->n_invs--;
6252     }
6253 }
6254
6255 /* Set USE not to be expressed by any candidate in IVS.  */
6256
6257 static void
6258 iv_ca_set_no_cp (struct ivopts_data *data, class iv_ca *ivs,
6259                  struct iv_group *group)
6260 {
6261   unsigned gid = group->id, cid;
6262   class cost_pair *cp;
6263
6264   cp = ivs->cand_for_group[gid];
6265   if (!cp)
6266     return;
6267   cid = cp->cand->id;
6268
6269   ivs->bad_groups++;
6270   ivs->cand_for_group[gid] = NULL;
6271   ivs->n_cand_uses[cid]--;
6272
6273   if (ivs->n_cand_uses[cid] == 0)
6274     {
6275       bitmap_clear_bit (ivs->cands, cid);
6276       if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6277         ivs->n_cands--;
6278       ivs->cand_cost -= cp->cand->cost;
6279       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6280       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6281     }
6282
6283   ivs->cand_use_cost -= cp->cost;
6284   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6285   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6286   iv_ca_recount_cost (data, ivs);
6287 }
6288
6289 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
6290    IVS.  */
6291
6292 static void
6293 iv_ca_set_add_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6294 {
6295   bitmap_iterator bi;
6296   unsigned iid;
6297
6298   if (!invs)
6299     return;
6300
6301   gcc_assert (n_inv_uses != NULL);
6302   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6303     {
6304       n_inv_uses[iid]++;
6305       if (n_inv_uses[iid] == 1)
6306         ivs->n_invs++;
6307     }
6308 }
6309
6310 /* Set cost pair for GROUP in set IVS to CP.  */
6311
6312 static void
6313 iv_ca_set_cp (struct ivopts_data *data, class iv_ca *ivs,
6314               struct iv_group *group, class cost_pair *cp)
6315 {
6316   unsigned gid = group->id, cid;
6317
6318   if (ivs->cand_for_group[gid] == cp)
6319     return;
6320
6321   if (ivs->cand_for_group[gid])
6322     iv_ca_set_no_cp (data, ivs, group);
6323
6324   if (cp)
6325     {
6326       cid = cp->cand->id;
6327
6328       ivs->bad_groups--;
6329       ivs->cand_for_group[gid] = cp;
6330       ivs->n_cand_uses[cid]++;
6331       if (ivs->n_cand_uses[cid] == 1)
6332         {
6333           bitmap_set_bit (ivs->cands, cid);
6334           if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6335             ivs->n_cands++;
6336           ivs->cand_cost += cp->cand->cost;
6337           iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6338           iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6339         }
6340
6341       ivs->cand_use_cost += cp->cost;
6342       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6343       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6344       iv_ca_recount_cost (data, ivs);
6345     }
6346 }
6347
6348 /* Extend set IVS by expressing USE by some of the candidates in it
6349    if possible.  Consider all important candidates if candidates in
6350    set IVS don't give any result.  */
6351
6352 static void
6353 iv_ca_add_group (struct ivopts_data *data, class iv_ca *ivs,
6354                struct iv_group *group)
6355 {
6356   class cost_pair *best_cp = NULL, *cp;
6357   bitmap_iterator bi;
6358   unsigned i;
6359   struct iv_cand *cand;
6360
6361   gcc_assert (ivs->upto >= group->id);
6362   ivs->upto++;
6363   ivs->bad_groups++;
6364
6365   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6366     {
6367       cand = data->vcands[i];
6368       cp = get_group_iv_cost (data, group, cand);
6369       if (cheaper_cost_pair (cp, best_cp))
6370         best_cp = cp;
6371     }
6372
6373   if (best_cp == NULL)
6374     {
6375       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6376         {
6377           cand = data->vcands[i];
6378           cp = get_group_iv_cost (data, group, cand);
6379           if (cheaper_cost_pair (cp, best_cp))
6380             best_cp = cp;
6381         }
6382     }
6383
6384   iv_ca_set_cp (data, ivs, group, best_cp);
6385 }
6386
6387 /* Get cost for assignment IVS.  */
6388
6389 static comp_cost
6390 iv_ca_cost (class iv_ca *ivs)
6391 {
6392   /* This was a conditional expression but it triggered a bug in
6393      Sun C 5.5.  */
6394   if (ivs->bad_groups)
6395     return infinite_cost;
6396   else
6397     return ivs->cost;
6398 }
6399
6400 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
6401    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
6402    respectively.  */
6403
6404 static int
6405 iv_ca_compare_deps (struct ivopts_data *data, class iv_ca *ivs,
6406                     struct iv_group *group, class cost_pair *old_cp,
6407                     class cost_pair *new_cp)
6408 {
6409   gcc_assert (old_cp && new_cp && old_cp != new_cp);
6410   unsigned old_n_invs = ivs->n_invs;
6411   iv_ca_set_cp (data, ivs, group, new_cp);
6412   unsigned new_n_invs = ivs->n_invs;
6413   iv_ca_set_cp (data, ivs, group, old_cp);
6414
6415   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
6416 }
6417
6418 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6419    it before NEXT.  */
6420
6421 static struct iv_ca_delta *
6422 iv_ca_delta_add (struct iv_group *group, class cost_pair *old_cp,
6423                  class cost_pair *new_cp, struct iv_ca_delta *next)
6424 {
6425   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6426
6427   change->group = group;
6428   change->old_cp = old_cp;
6429   change->new_cp = new_cp;
6430   change->next = next;
6431
6432   return change;
6433 }
6434
6435 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6436    are rewritten.  */
6437
6438 static struct iv_ca_delta *
6439 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6440 {
6441   struct iv_ca_delta *last;
6442
6443   if (!l2)
6444     return l1;
6445
6446   if (!l1)
6447     return l2;
6448
6449   for (last = l1; last->next; last = last->next)
6450     continue;
6451   last->next = l2;
6452
6453   return l1;
6454 }
6455
6456 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6457
6458 static struct iv_ca_delta *
6459 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6460 {
6461   struct iv_ca_delta *act, *next, *prev = NULL;
6462
6463   for (act = delta; act; act = next)
6464     {
6465       next = act->next;
6466       act->next = prev;
6467       prev = act;
6468
6469       std::swap (act->old_cp, act->new_cp);
6470     }
6471
6472   return prev;
6473 }
6474
6475 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6476    reverted instead.  */
6477
6478 static void
6479 iv_ca_delta_commit (struct ivopts_data *data, class iv_ca *ivs,
6480                     struct iv_ca_delta *delta, bool forward)
6481 {
6482   class cost_pair *from, *to;
6483   struct iv_ca_delta *act;
6484
6485   if (!forward)
6486     delta = iv_ca_delta_reverse (delta);
6487
6488   for (act = delta; act; act = act->next)
6489     {
6490       from = act->old_cp;
6491       to = act->new_cp;
6492       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6493       iv_ca_set_cp (data, ivs, act->group, to);
6494     }
6495
6496   if (!forward)
6497     iv_ca_delta_reverse (delta);
6498 }
6499
6500 /* Returns true if CAND is used in IVS.  */
6501
6502 static bool
6503 iv_ca_cand_used_p (class iv_ca *ivs, struct iv_cand *cand)
6504 {
6505   return ivs->n_cand_uses[cand->id] > 0;
6506 }
6507
6508 /* Returns number of induction variable candidates in the set IVS.  */
6509
6510 static unsigned
6511 iv_ca_n_cands (class iv_ca *ivs)
6512 {
6513   return ivs->n_cands;
6514 }
6515
6516 /* Free the list of changes DELTA.  */
6517
6518 static void
6519 iv_ca_delta_free (struct iv_ca_delta **delta)
6520 {
6521   struct iv_ca_delta *act, *next;
6522
6523   for (act = *delta; act; act = next)
6524     {
6525       next = act->next;
6526       free (act);
6527     }
6528
6529   *delta = NULL;
6530 }
6531
6532 /* Allocates new iv candidates assignment.  */
6533
6534 static class iv_ca *
6535 iv_ca_new (struct ivopts_data *data)
6536 {
6537   class iv_ca *nw = XNEW (class iv_ca);
6538
6539   nw->upto = 0;
6540   nw->bad_groups = 0;
6541   nw->cand_for_group = XCNEWVEC (class cost_pair *,
6542                                  data->vgroups.length ());
6543   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6544   nw->cands = BITMAP_ALLOC (NULL);
6545   nw->n_cands = 0;
6546   nw->n_invs = 0;
6547   nw->cand_use_cost = no_cost;
6548   nw->cand_cost = 0;
6549   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6550   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6551   nw->cost = no_cost;
6552
6553   return nw;
6554 }
6555
6556 /* Free memory occupied by the set IVS.  */
6557
6558 static void
6559 iv_ca_free (class iv_ca **ivs)
6560 {
6561   free ((*ivs)->cand_for_group);
6562   free ((*ivs)->n_cand_uses);
6563   BITMAP_FREE ((*ivs)->cands);
6564   free ((*ivs)->n_inv_var_uses);
6565   free ((*ivs)->n_inv_expr_uses);
6566   free (*ivs);
6567   *ivs = NULL;
6568 }
6569
6570 /* Dumps IVS to FILE.  */
6571
6572 static void
6573 iv_ca_dump (struct ivopts_data *data, FILE *file, class iv_ca *ivs)
6574 {
6575   unsigned i;
6576   comp_cost cost = iv_ca_cost (ivs);
6577
6578   fprintf (file, "  cost: %" PRId64 " (complexity %d)\n", cost.cost,
6579            cost.complexity);
6580   fprintf (file, "  reg_cost: %d\n",
6581            ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands));
6582   fprintf (file, "  cand_cost: %" PRId64 "\n  cand_group_cost: "
6583            "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6584            ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6585   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6586
6587   for (i = 0; i < ivs->upto; i++)
6588     {
6589       struct iv_group *group = data->vgroups[i];
6590       class cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6591       if (cp)
6592         fprintf (file, "   group:%d --> iv_cand:%d, cost=("
6593                  "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6594                  cp->cost.cost, cp->cost.complexity);
6595       else
6596         fprintf (file, "   group:%d --> ??\n", group->id);
6597     }
6598
6599   const char *pref = "";
6600   fprintf (file, "  invariant variables: ");
6601   for (i = 1; i <= data->max_inv_var_id; i++)
6602     if (ivs->n_inv_var_uses[i])
6603       {
6604         fprintf (file, "%s%d", pref, i);
6605         pref = ", ";
6606       }
6607
6608   pref = "";
6609   fprintf (file, "\n  invariant expressions: ");
6610   for (i = 1; i <= data->max_inv_expr_id; i++)
6611     if (ivs->n_inv_expr_uses[i])
6612       {
6613         fprintf (file, "%s%d", pref, i);
6614         pref = ", ";
6615       }
6616
6617   fprintf (file, "\n\n");
6618 }
6619
6620 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6621    new set, and store differences in DELTA.  Number of induction variables
6622    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6623    the function will try to find a solution with mimimal iv candidates.  */
6624
6625 static comp_cost
6626 iv_ca_extend (struct ivopts_data *data, class iv_ca *ivs,
6627               struct iv_cand *cand, struct iv_ca_delta **delta,
6628               unsigned *n_ivs, bool min_ncand)
6629 {
6630   unsigned i;
6631   comp_cost cost;
6632   struct iv_group *group;
6633   class cost_pair *old_cp, *new_cp;
6634
6635   *delta = NULL;
6636   for (i = 0; i < ivs->upto; i++)
6637     {
6638       group = data->vgroups[i];
6639       old_cp = iv_ca_cand_for_group (ivs, group);
6640
6641       if (old_cp
6642           && old_cp->cand == cand)
6643         continue;
6644
6645       new_cp = get_group_iv_cost (data, group, cand);
6646       if (!new_cp)
6647         continue;
6648
6649       if (!min_ncand)
6650         {
6651           int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6652           /* Skip if new_cp depends on more invariants.  */
6653           if (cmp_invs > 0)
6654             continue;
6655
6656           int cmp_cost = compare_cost_pair (new_cp, old_cp);
6657           /* Skip if new_cp is not cheaper.  */
6658           if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6659             continue;
6660         }
6661
6662       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6663     }
6664
6665   iv_ca_delta_commit (data, ivs, *delta, true);
6666   cost = iv_ca_cost (ivs);
6667   if (n_ivs)
6668     *n_ivs = iv_ca_n_cands (ivs);
6669   iv_ca_delta_commit (data, ivs, *delta, false);
6670
6671   return cost;
6672 }
6673
6674 /* Try narrowing set IVS by removing CAND.  Return the cost of
6675    the new set and store the differences in DELTA.  START is
6676    the candidate with which we start narrowing.  */
6677
6678 static comp_cost
6679 iv_ca_narrow (struct ivopts_data *data, class iv_ca *ivs,
6680               struct iv_cand *cand, struct iv_cand *start,
6681               struct iv_ca_delta **delta)
6682 {
6683   unsigned i, ci;
6684   struct iv_group *group;
6685   class cost_pair *old_cp, *new_cp, *cp;
6686   bitmap_iterator bi;
6687   struct iv_cand *cnd;
6688   comp_cost cost, best_cost, acost;
6689
6690   *delta = NULL;
6691   for (i = 0; i < data->vgroups.length (); i++)
6692     {
6693       group = data->vgroups[i];
6694
6695       old_cp = iv_ca_cand_for_group (ivs, group);
6696       if (old_cp->cand != cand)
6697         continue;
6698
6699       best_cost = iv_ca_cost (ivs);
6700       /* Start narrowing with START.  */
6701       new_cp = get_group_iv_cost (data, group, start);
6702
6703       if (data->consider_all_candidates)
6704         {
6705           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6706             {
6707               if (ci == cand->id || (start && ci == start->id))
6708                 continue;
6709
6710               cnd = data->vcands[ci];
6711
6712               cp = get_group_iv_cost (data, group, cnd);
6713               if (!cp)
6714                 continue;
6715
6716               iv_ca_set_cp (data, ivs, group, cp);
6717               acost = iv_ca_cost (ivs);
6718
6719               if (acost < best_cost)
6720                 {
6721                   best_cost = acost;
6722                   new_cp = cp;
6723                 }
6724             }
6725         }
6726       else
6727         {
6728           EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6729             {
6730               if (ci == cand->id || (start && ci == start->id))
6731                 continue;
6732
6733               cnd = data->vcands[ci];
6734
6735               cp = get_group_iv_cost (data, group, cnd);
6736               if (!cp)
6737                 continue;
6738
6739               iv_ca_set_cp (data, ivs, group, cp);
6740               acost = iv_ca_cost (ivs);
6741
6742               if (acost < best_cost)
6743                 {
6744                   best_cost = acost;
6745                   new_cp = cp;
6746                 }
6747             }
6748         }
6749       /* Restore to old cp for use.  */
6750       iv_ca_set_cp (data, ivs, group, old_cp);
6751
6752       if (!new_cp)
6753         {
6754           iv_ca_delta_free (delta);
6755           return infinite_cost;
6756         }
6757
6758       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6759     }
6760
6761   iv_ca_delta_commit (data, ivs, *delta, true);
6762   cost = iv_ca_cost (ivs);
6763   iv_ca_delta_commit (data, ivs, *delta, false);
6764
6765   return cost;
6766 }
6767
6768 /* Try optimizing the set of candidates IVS by removing candidates different
6769    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6770    differences in DELTA.  */
6771
6772 static comp_cost
6773 iv_ca_prune (struct ivopts_data *data, class iv_ca *ivs,
6774              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6775 {
6776   bitmap_iterator bi;
6777   struct iv_ca_delta *act_delta, *best_delta;
6778   unsigned i;
6779   comp_cost best_cost, acost;
6780   struct iv_cand *cand;
6781
6782   best_delta = NULL;
6783   best_cost = iv_ca_cost (ivs);
6784
6785   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6786     {
6787       cand = data->vcands[i];
6788
6789       if (cand == except_cand)
6790         continue;
6791
6792       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6793
6794       if (acost < best_cost)
6795         {
6796           best_cost = acost;
6797           iv_ca_delta_free (&best_delta);
6798           best_delta = act_delta;
6799         }
6800       else
6801         iv_ca_delta_free (&act_delta);
6802     }
6803
6804   if (!best_delta)
6805     {
6806       *delta = NULL;
6807       return best_cost;
6808     }
6809
6810   /* Recurse to possibly remove other unnecessary ivs.  */
6811   iv_ca_delta_commit (data, ivs, best_delta, true);
6812   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6813   iv_ca_delta_commit (data, ivs, best_delta, false);
6814   *delta = iv_ca_delta_join (best_delta, *delta);
6815   return best_cost;
6816 }
6817
6818 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6819    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6820    the corresponding cost_pair, otherwise just return BEST_CP.  */
6821
6822 static class cost_pair*
6823 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6824                         unsigned int cand_idx, struct iv_cand *old_cand,
6825                         class cost_pair *best_cp)
6826 {
6827   struct iv_cand *cand;
6828   class cost_pair *cp;
6829
6830   gcc_assert (old_cand != NULL && best_cp != NULL);
6831   if (cand_idx == old_cand->id)
6832     return best_cp;
6833
6834   cand = data->vcands[cand_idx];
6835   cp = get_group_iv_cost (data, group, cand);
6836   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6837     return cp;
6838
6839   return best_cp;
6840 }
6841
6842 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6843    which are used by more than one iv uses.  For each of those candidates,
6844    this function tries to represent iv uses under that candidate using
6845    other ones with lower local cost, then tries to prune the new set.
6846    If the new set has lower cost, It returns the new cost after recording
6847    candidate replacement in list DELTA.  */
6848
6849 static comp_cost
6850 iv_ca_replace (struct ivopts_data *data, class iv_ca *ivs,
6851                struct iv_ca_delta **delta)
6852 {
6853   bitmap_iterator bi, bj;
6854   unsigned int i, j, k;
6855   struct iv_cand *cand;
6856   comp_cost orig_cost, acost;
6857   struct iv_ca_delta *act_delta, *tmp_delta;
6858   class cost_pair *old_cp, *best_cp = NULL;
6859
6860   *delta = NULL;
6861   orig_cost = iv_ca_cost (ivs);
6862
6863   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6864     {
6865       if (ivs->n_cand_uses[i] == 1
6866           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6867         continue;
6868
6869       cand = data->vcands[i];
6870
6871       act_delta = NULL;
6872       /*  Represent uses under current candidate using other ones with
6873           lower local cost.  */
6874       for (j = 0; j < ivs->upto; j++)
6875         {
6876           struct iv_group *group = data->vgroups[j];
6877           old_cp = iv_ca_cand_for_group (ivs, group);
6878
6879           if (old_cp->cand != cand)
6880             continue;
6881
6882           best_cp = old_cp;
6883           if (data->consider_all_candidates)
6884             for (k = 0; k < data->vcands.length (); k++)
6885               best_cp = cheaper_cost_with_cand (data, group, k,
6886                                                 old_cp->cand, best_cp);
6887           else
6888             EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6889               best_cp = cheaper_cost_with_cand (data, group, k,
6890                                                 old_cp->cand, best_cp);
6891
6892           if (best_cp == old_cp)
6893             continue;
6894
6895           act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6896         }
6897       /* No need for further prune.  */
6898       if (!act_delta)
6899         continue;
6900
6901       /* Prune the new candidate set.  */
6902       iv_ca_delta_commit (data, ivs, act_delta, true);
6903       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6904       iv_ca_delta_commit (data, ivs, act_delta, false);
6905       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6906
6907       if (acost < orig_cost)
6908         {
6909           *delta = act_delta;
6910           return acost;
6911         }
6912       else
6913         iv_ca_delta_free (&act_delta);
6914     }
6915
6916   return orig_cost;
6917 }
6918
6919 /* Tries to extend the sets IVS in the best possible way in order to
6920    express the GROUP.  If ORIGINALP is true, prefer candidates from
6921    the original set of IVs, otherwise favor important candidates not
6922    based on any memory object.  */
6923
6924 static bool
6925 try_add_cand_for (struct ivopts_data *data, class iv_ca *ivs,
6926                   struct iv_group *group, bool originalp)
6927 {
6928   comp_cost best_cost, act_cost;
6929   unsigned i;
6930   bitmap_iterator bi;
6931   struct iv_cand *cand;
6932   struct iv_ca_delta *best_delta = NULL, *act_delta;
6933   class cost_pair *cp;
6934
6935   iv_ca_add_group (data, ivs, group);
6936   best_cost = iv_ca_cost (ivs);
6937   cp = iv_ca_cand_for_group (ivs, group);
6938   if (cp)
6939     {
6940       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6941       iv_ca_set_no_cp (data, ivs, group);
6942     }
6943
6944   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6945      first try important candidates not based on any memory object.  Only if
6946      this fails, try the specific ones.  Rationale -- in loops with many
6947      variables the best choice often is to use just one generic biv.  If we
6948      added here many ivs specific to the uses, the optimization algorithm later
6949      would be likely to get stuck in a local minimum, thus causing us to create
6950      too many ivs.  The approach from few ivs to more seems more likely to be
6951      successful -- starting from few ivs, replacing an expensive use by a
6952      specific iv should always be a win.  */
6953   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6954     {
6955       cand = data->vcands[i];
6956
6957       if (originalp && cand->pos !=IP_ORIGINAL)
6958         continue;
6959
6960       if (!originalp && cand->iv->base_object != NULL_TREE)
6961         continue;
6962
6963       if (iv_ca_cand_used_p (ivs, cand))
6964         continue;
6965
6966       cp = get_group_iv_cost (data, group, cand);
6967       if (!cp)
6968         continue;
6969
6970       iv_ca_set_cp (data, ivs, group, cp);
6971       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6972                                true);
6973       iv_ca_set_no_cp (data, ivs, group);
6974       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6975
6976       if (act_cost < best_cost)
6977         {
6978           best_cost = act_cost;
6979
6980           iv_ca_delta_free (&best_delta);
6981           best_delta = act_delta;
6982         }
6983       else
6984         iv_ca_delta_free (&act_delta);
6985     }
6986
6987   if (best_cost.infinite_cost_p ())
6988     {
6989       for (i = 0; i < group->n_map_members; i++)
6990         {
6991           cp = group->cost_map + i;
6992           cand = cp->cand;
6993           if (!cand)
6994             continue;
6995
6996           /* Already tried this.  */
6997           if (cand->important)
6998             {
6999               if (originalp && cand->pos == IP_ORIGINAL)
7000                 continue;
7001               if (!originalp && cand->iv->base_object == NULL_TREE)
7002                 continue;
7003             }
7004
7005           if (iv_ca_cand_used_p (ivs, cand))
7006             continue;
7007
7008           act_delta = NULL;
7009           iv_ca_set_cp (data, ivs, group, cp);
7010           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
7011           iv_ca_set_no_cp (data, ivs, group);
7012           act_delta = iv_ca_delta_add (group,
7013                                        iv_ca_cand_for_group (ivs, group),
7014                                        cp, act_delta);
7015
7016           if (act_cost < best_cost)
7017             {
7018               best_cost = act_cost;
7019
7020               if (best_delta)
7021                 iv_ca_delta_free (&best_delta);
7022               best_delta = act_delta;
7023             }
7024           else
7025             iv_ca_delta_free (&act_delta);
7026         }
7027     }
7028
7029   iv_ca_delta_commit (data, ivs, best_delta, true);
7030   iv_ca_delta_free (&best_delta);
7031
7032   return !best_cost.infinite_cost_p ();
7033 }
7034
7035 /* Finds an initial assignment of candidates to uses.  */
7036
7037 static class iv_ca *
7038 get_initial_solution (struct ivopts_data *data, bool originalp)
7039 {
7040   unsigned i;
7041   class iv_ca *ivs = iv_ca_new (data);
7042
7043   for (i = 0; i < data->vgroups.length (); i++)
7044     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
7045       {
7046         iv_ca_free (&ivs);
7047         return NULL;
7048       }
7049
7050   return ivs;
7051 }
7052
7053 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
7054    points to a bool variable, this function tries to break local
7055    optimal fixed-point by replacing candidates in IVS if it's true.  */
7056
7057 static bool
7058 try_improve_iv_set (struct ivopts_data *data,
7059                     class iv_ca *ivs, bool *try_replace_p)
7060 {
7061   unsigned i, n_ivs;
7062   comp_cost acost, best_cost = iv_ca_cost (ivs);
7063   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
7064   struct iv_cand *cand;
7065
7066   /* Try extending the set of induction variables by one.  */
7067   for (i = 0; i < data->vcands.length (); i++)
7068     {
7069       cand = data->vcands[i];
7070
7071       if (iv_ca_cand_used_p (ivs, cand))
7072         continue;
7073
7074       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
7075       if (!act_delta)
7076         continue;
7077
7078       /* If we successfully added the candidate and the set is small enough,
7079          try optimizing it by removing other candidates.  */
7080       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
7081         {
7082           iv_ca_delta_commit (data, ivs, act_delta, true);
7083           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
7084           iv_ca_delta_commit (data, ivs, act_delta, false);
7085           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
7086         }
7087
7088       if (acost < best_cost)
7089         {
7090           best_cost = acost;
7091           iv_ca_delta_free (&best_delta);
7092           best_delta = act_delta;
7093         }
7094       else
7095         iv_ca_delta_free (&act_delta);
7096     }
7097
7098   if (!best_delta)
7099     {
7100       /* Try removing the candidates from the set instead.  */
7101       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
7102
7103       if (!best_delta && *try_replace_p)
7104         {
7105           *try_replace_p = false;
7106           /* So far candidate selecting algorithm tends to choose fewer IVs
7107              so that it can handle cases in which loops have many variables
7108              but the best choice is often to use only one general biv.  One
7109              weakness is it can't handle opposite cases, in which different
7110              candidates should be chosen with respect to each use.  To solve
7111              the problem, we replace candidates in a manner described by the
7112              comments of iv_ca_replace, thus give general algorithm a chance
7113              to break local optimal fixed-point in these cases.  */
7114           best_cost = iv_ca_replace (data, ivs, &best_delta);
7115         }
7116
7117       if (!best_delta)
7118         return false;
7119     }
7120
7121   iv_ca_delta_commit (data, ivs, best_delta, true);
7122   iv_ca_delta_free (&best_delta);
7123   return best_cost == iv_ca_cost (ivs);
7124 }
7125
7126 /* Attempts to find the optimal set of induction variables.  We do simple
7127    greedy heuristic -- we try to replace at most one candidate in the selected
7128    solution and remove the unused ivs while this improves the cost.  */
7129
7130 static class iv_ca *
7131 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
7132 {
7133   class iv_ca *set;
7134   bool try_replace_p = true;
7135
7136   /* Get the initial solution.  */
7137   set = get_initial_solution (data, originalp);
7138   if (!set)
7139     {
7140       if (dump_file && (dump_flags & TDF_DETAILS))
7141         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
7142       return NULL;
7143     }
7144
7145   if (dump_file && (dump_flags & TDF_DETAILS))
7146     {
7147       fprintf (dump_file, "Initial set of candidates:\n");
7148       iv_ca_dump (data, dump_file, set);
7149     }
7150
7151   while (try_improve_iv_set (data, set, &try_replace_p))
7152     {
7153       if (dump_file && (dump_flags & TDF_DETAILS))
7154         {
7155           fprintf (dump_file, "Improved to:\n");
7156           iv_ca_dump (data, dump_file, set);
7157         }
7158     }
7159
7160   /* If the set has infinite_cost, it can't be optimal.  */
7161   if (iv_ca_cost (set).infinite_cost_p ())
7162     {
7163       if (dump_file && (dump_flags & TDF_DETAILS))
7164         fprintf (dump_file,
7165                  "Overflow to infinite cost in try_improve_iv_set.\n");
7166       iv_ca_free (&set);
7167     }
7168   return set;
7169 }
7170
7171 static class iv_ca *
7172 find_optimal_iv_set (struct ivopts_data *data)
7173 {
7174   unsigned i;
7175   comp_cost cost, origcost;
7176   class iv_ca *set, *origset;
7177
7178   /* Determine the cost based on a strategy that starts with original IVs,
7179      and try again using a strategy that prefers candidates not based
7180      on any IVs.  */
7181   origset = find_optimal_iv_set_1 (data, true);
7182   set = find_optimal_iv_set_1 (data, false);
7183
7184   if (!origset && !set)
7185     return NULL;
7186
7187   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7188   cost = set ? iv_ca_cost (set) : infinite_cost;
7189
7190   if (dump_file && (dump_flags & TDF_DETAILS))
7191     {
7192       fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
7193                origcost.cost, origcost.complexity);
7194       fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
7195                cost.cost, cost.complexity);
7196     }
7197
7198   /* Choose the one with the best cost.  */
7199   if (origcost <= cost)
7200     {
7201       if (set)
7202         iv_ca_free (&set);
7203       set = origset;
7204     }
7205   else if (origset)
7206     iv_ca_free (&origset);
7207
7208   for (i = 0; i < data->vgroups.length (); i++)
7209     {
7210       struct iv_group *group = data->vgroups[i];
7211       group->selected = iv_ca_cand_for_group (set, group)->cand;
7212     }
7213
7214   return set;
7215 }
7216
7217 /* Creates a new induction variable corresponding to CAND.  */
7218
7219 static void
7220 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7221 {
7222   gimple_stmt_iterator incr_pos;
7223   tree base;
7224   struct iv_use *use;
7225   struct iv_group *group;
7226   bool after = false;
7227
7228   gcc_assert (cand->iv != NULL);
7229
7230   switch (cand->pos)
7231     {
7232     case IP_NORMAL:
7233       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7234       break;
7235
7236     case IP_END:
7237       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7238       after = true;
7239       if (!gsi_end_p (incr_pos) && stmt_ends_bb_p (gsi_stmt (incr_pos)))
7240         {
7241           edge e = find_edge (gsi_bb (incr_pos), data->current_loop->header);
7242           incr_pos = gsi_after_labels (split_edge (e));
7243           after = false;
7244         }
7245       break;
7246
7247     case IP_AFTER_USE:
7248       after = true;
7249       /* fall through */
7250     case IP_BEFORE_USE:
7251       incr_pos = gsi_for_stmt (cand->incremented_at);
7252       break;
7253
7254     case IP_ORIGINAL:
7255       /* Mark that the iv is preserved.  */
7256       name_info (data, cand->var_before)->preserve_biv = true;
7257       name_info (data, cand->var_after)->preserve_biv = true;
7258
7259       /* Rewrite the increment so that it uses var_before directly.  */
7260       use = find_interesting_uses_op (data, cand->var_after);
7261       group = data->vgroups[use->group_id];
7262       group->selected = cand;
7263       return;
7264     }
7265
7266   gimple_add_tmp_var (cand->var_before);
7267
7268   base = unshare_expr (cand->iv->base);
7269
7270   create_iv (base, unshare_expr (cand->iv->step),
7271              cand->var_before, data->current_loop,
7272              &incr_pos, after, &cand->var_before, &cand->var_after);
7273 }
7274
7275 /* Creates new induction variables described in SET.  */
7276
7277 static void
7278 create_new_ivs (struct ivopts_data *data, class iv_ca *set)
7279 {
7280   unsigned i;
7281   struct iv_cand *cand;
7282   bitmap_iterator bi;
7283
7284   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7285     {
7286       cand = data->vcands[i];
7287       create_new_iv (data, cand);
7288     }
7289
7290   if (dump_file && (dump_flags & TDF_DETAILS))
7291     {
7292       fprintf (dump_file, "Selected IV set for loop %d",
7293                data->current_loop->num);
7294       if (data->loop_loc != UNKNOWN_LOCATION)
7295         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7296                  LOCATION_LINE (data->loop_loc));
7297       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7298                avg_loop_niter (data->current_loop));
7299       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7300       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7301         {
7302           cand = data->vcands[i];
7303           dump_cand (dump_file, cand);
7304         }
7305       fprintf (dump_file, "\n");
7306     }
7307 }
7308
7309 /* Rewrites USE (definition of iv used in a nonlinear expression)
7310    using candidate CAND.  */
7311
7312 static void
7313 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7314                             struct iv_use *use, struct iv_cand *cand)
7315 {
7316   gassign *ass;
7317   gimple_stmt_iterator bsi;
7318   tree comp, type = get_use_type (use), tgt;
7319
7320   /* An important special case -- if we are asked to express value of
7321      the original iv by itself, just exit; there is no need to
7322      introduce a new computation (that might also need casting the
7323      variable to unsigned and back).  */
7324   if (cand->pos == IP_ORIGINAL
7325       && cand->incremented_at == use->stmt)
7326     {
7327       tree op = NULL_TREE;
7328       enum tree_code stmt_code;
7329
7330       gcc_assert (is_gimple_assign (use->stmt));
7331       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7332
7333       /* Check whether we may leave the computation unchanged.
7334          This is the case only if it does not rely on other
7335          computations in the loop -- otherwise, the computation
7336          we rely upon may be removed in remove_unused_ivs,
7337          thus leading to ICE.  */
7338       stmt_code = gimple_assign_rhs_code (use->stmt);
7339       if (stmt_code == PLUS_EXPR
7340           || stmt_code == MINUS_EXPR
7341           || stmt_code == POINTER_PLUS_EXPR)
7342         {
7343           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7344             op = gimple_assign_rhs2 (use->stmt);
7345           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7346             op = gimple_assign_rhs1 (use->stmt);
7347         }
7348
7349       if (op != NULL_TREE)
7350         {
7351           if (expr_invariant_in_loop_p (data->current_loop, op))
7352             return;
7353           if (TREE_CODE (op) == SSA_NAME)
7354             {
7355               struct iv *iv = get_iv (data, op);
7356               if (iv != NULL && integer_zerop (iv->step))
7357                 return;
7358             }
7359         }
7360     }
7361
7362   switch (gimple_code (use->stmt))
7363     {
7364     case GIMPLE_PHI:
7365       tgt = PHI_RESULT (use->stmt);
7366
7367       /* If we should keep the biv, do not replace it.  */
7368       if (name_info (data, tgt)->preserve_biv)
7369         return;
7370
7371       bsi = gsi_after_labels (gimple_bb (use->stmt));
7372       break;
7373
7374     case GIMPLE_ASSIGN:
7375       tgt = gimple_assign_lhs (use->stmt);
7376       bsi = gsi_for_stmt (use->stmt);
7377       break;
7378
7379     default:
7380       gcc_unreachable ();
7381     }
7382
7383   aff_tree aff_inv, aff_var;
7384   if (!get_computation_aff_1 (data->current_loop, use->stmt,
7385                               use, cand, &aff_inv, &aff_var))
7386     gcc_unreachable ();
7387
7388   unshare_aff_combination (&aff_inv);
7389   unshare_aff_combination (&aff_var);
7390   /* Prefer CSE opportunity than loop invariant by adding offset at last
7391      so that iv_uses have different offsets can be CSEed.  */
7392   poly_widest_int offset = aff_inv.offset;
7393   aff_inv.offset = 0;
7394
7395   gimple_seq stmt_list = NULL, seq = NULL;
7396   tree comp_op1 = aff_combination_to_tree (&aff_inv);
7397   tree comp_op2 = aff_combination_to_tree (&aff_var);
7398   gcc_assert (comp_op1 && comp_op2);
7399
7400   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
7401   gimple_seq_add_seq (&stmt_list, seq);
7402   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
7403   gimple_seq_add_seq (&stmt_list, seq);
7404
7405   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
7406     std::swap (comp_op1, comp_op2);
7407
7408   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
7409     {
7410       comp = fold_build_pointer_plus (comp_op1,
7411                                       fold_convert (sizetype, comp_op2));
7412       comp = fold_build_pointer_plus (comp,
7413                                       wide_int_to_tree (sizetype, offset));
7414     }
7415   else
7416     {
7417       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
7418                           fold_convert (TREE_TYPE (comp_op1), comp_op2));
7419       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
7420                           wide_int_to_tree (TREE_TYPE (comp_op1), offset));
7421     }
7422
7423   comp = fold_convert (type, comp);
7424   comp = force_gimple_operand (comp, &seq, false, NULL);
7425   gimple_seq_add_seq (&stmt_list, seq);
7426   if (gimple_code (use->stmt) != GIMPLE_PHI
7427       /* We can't allow re-allocating the stmt as it might be pointed
7428          to still.  */
7429       && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7430           >= gimple_num_ops (gsi_stmt (bsi))))
7431     {
7432       comp = force_gimple_operand (comp, &seq, true, NULL);
7433       gimple_seq_add_seq (&stmt_list, seq);
7434       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7435         {
7436           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7437           /* As this isn't a plain copy we have to reset alignment
7438              information.  */
7439           if (SSA_NAME_PTR_INFO (comp))
7440             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7441         }
7442     }
7443
7444   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7445   if (gimple_code (use->stmt) == GIMPLE_PHI)
7446     {
7447       ass = gimple_build_assign (tgt, comp);
7448       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7449
7450       bsi = gsi_for_stmt (use->stmt);
7451       remove_phi_node (&bsi, false);
7452     }
7453   else
7454     {
7455       gimple_assign_set_rhs_from_tree (&bsi, comp);
7456       use->stmt = gsi_stmt (bsi);
7457     }
7458 }
7459
7460 /* Performs a peephole optimization to reorder the iv update statement with
7461    a mem ref to enable instruction combining in later phases. The mem ref uses
7462    the iv value before the update, so the reordering transformation requires
7463    adjustment of the offset. CAND is the selected IV_CAND.
7464
7465    Example:
7466
7467    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7468    iv2 = iv1 + 1;
7469
7470    if (t < val)      (1)
7471      goto L;
7472    goto Head;
7473
7474
7475    directly propagating t over to (1) will introduce overlapping live range
7476    thus increase register pressure. This peephole transform it into:
7477
7478
7479    iv2 = iv1 + 1;
7480    t = MEM_REF (base, iv2, 8, 8);
7481    if (t < val)
7482      goto L;
7483    goto Head;
7484 */
7485
7486 static void
7487 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7488 {
7489   tree var_after;
7490   gimple *iv_update, *stmt;
7491   basic_block bb;
7492   gimple_stmt_iterator gsi, gsi_iv;
7493
7494   if (cand->pos != IP_NORMAL)
7495     return;
7496
7497   var_after = cand->var_after;
7498   iv_update = SSA_NAME_DEF_STMT (var_after);
7499
7500   bb = gimple_bb (iv_update);
7501   gsi = gsi_last_nondebug_bb (bb);
7502   stmt = gsi_stmt (gsi);
7503
7504   /* Only handle conditional statement for now.  */
7505   if (gimple_code (stmt) != GIMPLE_COND)
7506     return;
7507
7508   gsi_prev_nondebug (&gsi);
7509   stmt = gsi_stmt (gsi);
7510   if (stmt != iv_update)
7511     return;
7512
7513   gsi_prev_nondebug (&gsi);
7514   if (gsi_end_p (gsi))
7515     return;
7516
7517   stmt = gsi_stmt (gsi);
7518   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7519     return;
7520
7521   if (stmt != use->stmt)
7522     return;
7523
7524   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7525     return;
7526
7527   if (dump_file && (dump_flags & TDF_DETAILS))
7528     {
7529       fprintf (dump_file, "Reordering \n");
7530       print_gimple_stmt (dump_file, iv_update, 0);
7531       print_gimple_stmt (dump_file, use->stmt, 0);
7532       fprintf (dump_file, "\n");
7533     }
7534
7535   gsi = gsi_for_stmt (use->stmt);
7536   gsi_iv = gsi_for_stmt (iv_update);
7537   gsi_move_before (&gsi_iv, &gsi);
7538
7539   cand->pos = IP_BEFORE_USE;
7540   cand->incremented_at = use->stmt;
7541 }
7542
7543 /* Return the alias pointer type that should be used for a MEM_REF
7544    associated with USE, which has type USE_PTR_ADDRESS.  */
7545
7546 static tree
7547 get_alias_ptr_type_for_ptr_address (iv_use *use)
7548 {
7549   gcall *call = as_a <gcall *> (use->stmt);
7550   switch (gimple_call_internal_fn (call))
7551     {
7552     case IFN_MASK_LOAD:
7553     case IFN_MASK_STORE:
7554     case IFN_MASK_LOAD_LANES:
7555     case IFN_MASK_STORE_LANES:
7556     case IFN_LEN_LOAD:
7557     case IFN_LEN_STORE:
7558       /* The second argument contains the correct alias type.  */
7559       gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7560       return TREE_TYPE (gimple_call_arg (call, 1));
7561
7562     default:
7563       gcc_unreachable ();
7564     }
7565 }
7566
7567
7568 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7569
7570 static void
7571 rewrite_use_address (struct ivopts_data *data,
7572                      struct iv_use *use, struct iv_cand *cand)
7573 {
7574   aff_tree aff;
7575   bool ok;
7576
7577   adjust_iv_update_pos (cand, use);
7578   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7579   gcc_assert (ok);
7580   unshare_aff_combination (&aff);
7581
7582   /* To avoid undefined overflow problems, all IV candidates use unsigned
7583      integer types.  The drawback is that this makes it impossible for
7584      create_mem_ref to distinguish an IV that is based on a memory object
7585      from one that represents simply an offset.
7586
7587      To work around this problem, we pass a hint to create_mem_ref that
7588      indicates which variable (if any) in aff is an IV based on a memory
7589      object.  Note that we only consider the candidate.  If this is not
7590      based on an object, the base of the reference is in some subexpression
7591      of the use -- but these will use pointer types, so they are recognized
7592      by the create_mem_ref heuristics anyway.  */
7593   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7594   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7595   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7596   tree type = use->mem_type;
7597   tree alias_ptr_type;
7598   if (use->type == USE_PTR_ADDRESS)
7599     alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7600   else
7601     {
7602       gcc_assert (type == TREE_TYPE (*use->op_p));
7603       unsigned int align = get_object_alignment (*use->op_p);
7604       if (align != TYPE_ALIGN (type))
7605         type = build_aligned_type (type, align);
7606       alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7607     }
7608   tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7609                              iv, base_hint, data->speed);
7610
7611   if (use->type == USE_PTR_ADDRESS)
7612     {
7613       ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7614       ref = fold_convert (get_use_type (use), ref);
7615       ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7616                                       true, GSI_SAME_STMT);
7617     }
7618   else
7619     copy_ref_info (ref, *use->op_p);
7620
7621   *use->op_p = ref;
7622 }
7623
7624 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7625    candidate CAND.  */
7626
7627 static void
7628 rewrite_use_compare (struct ivopts_data *data,
7629                      struct iv_use *use, struct iv_cand *cand)
7630 {
7631   tree comp, op, bound;
7632   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7633   enum tree_code compare;
7634   struct iv_group *group = data->vgroups[use->group_id];
7635   class cost_pair *cp = get_group_iv_cost (data, group, cand);
7636
7637   bound = cp->value;
7638   if (bound)
7639     {
7640       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7641       tree var_type = TREE_TYPE (var);
7642       gimple_seq stmts;
7643
7644       if (dump_file && (dump_flags & TDF_DETAILS))
7645         {
7646           fprintf (dump_file, "Replacing exit test: ");
7647           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7648         }
7649       compare = cp->comp;
7650       bound = unshare_expr (fold_convert (var_type, bound));
7651       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7652       if (stmts)
7653         gsi_insert_seq_on_edge_immediate (
7654                 loop_preheader_edge (data->current_loop),
7655                 stmts);
7656
7657       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7658       gimple_cond_set_lhs (cond_stmt, var);
7659       gimple_cond_set_code (cond_stmt, compare);
7660       gimple_cond_set_rhs (cond_stmt, op);
7661       return;
7662     }
7663
7664   /* The induction variable elimination failed; just express the original
7665      giv.  */
7666   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7667   gcc_assert (comp != NULL_TREE);
7668   gcc_assert (use->op_p != NULL);
7669   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7670                                          SSA_NAME_VAR (*use->op_p),
7671                                          true, GSI_SAME_STMT);
7672 }
7673
7674 /* Rewrite the groups using the selected induction variables.  */
7675
7676 static void
7677 rewrite_groups (struct ivopts_data *data)
7678 {
7679   unsigned i, j;
7680
7681   for (i = 0; i < data->vgroups.length (); i++)
7682     {
7683       struct iv_group *group = data->vgroups[i];
7684       struct iv_cand *cand = group->selected;
7685
7686       gcc_assert (cand);
7687
7688       if (group->type == USE_NONLINEAR_EXPR)
7689         {
7690           for (j = 0; j < group->vuses.length (); j++)
7691             {
7692               rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7693               update_stmt (group->vuses[j]->stmt);
7694             }
7695         }
7696       else if (address_p (group->type))
7697         {
7698           for (j = 0; j < group->vuses.length (); j++)
7699             {
7700               rewrite_use_address (data, group->vuses[j], cand);
7701               update_stmt (group->vuses[j]->stmt);
7702             }
7703         }
7704       else
7705         {
7706           gcc_assert (group->type == USE_COMPARE);
7707
7708           for (j = 0; j < group->vuses.length (); j++)
7709             {
7710               rewrite_use_compare (data, group->vuses[j], cand);
7711               update_stmt (group->vuses[j]->stmt);
7712             }
7713         }
7714     }
7715 }
7716
7717 /* Removes the ivs that are not used after rewriting.  */
7718
7719 static void
7720 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7721 {
7722   unsigned j;
7723   bitmap_iterator bi;
7724
7725   /* Figure out an order in which to release SSA DEFs so that we don't
7726      release something that we'd have to propagate into a debug stmt
7727      afterwards.  */
7728   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7729     {
7730       struct version_info *info;
7731
7732       info = ver_info (data, j);
7733       if (info->iv
7734           && !integer_zerop (info->iv->step)
7735           && !info->inv_id
7736           && !info->iv->nonlin_use
7737           && !info->preserve_biv)
7738         {
7739           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7740
7741           tree def = info->iv->ssa_name;
7742
7743           if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7744             {
7745               imm_use_iterator imm_iter;
7746               use_operand_p use_p;
7747               gimple *stmt;
7748               int count = 0;
7749
7750               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7751                 {
7752                   if (!gimple_debug_bind_p (stmt))
7753                     continue;
7754
7755                   /* We just want to determine whether to do nothing
7756                      (count == 0), to substitute the computed
7757                      expression into a single use of the SSA DEF by
7758                      itself (count == 1), or to use a debug temp
7759                      because the SSA DEF is used multiple times or as
7760                      part of a larger expression (count > 1). */
7761                   count++;
7762                   if (gimple_debug_bind_get_value (stmt) != def)
7763                     count++;
7764
7765                   if (count > 1)
7766                     break;
7767                 }
7768
7769               if (!count)
7770                 continue;
7771
7772               struct iv_use dummy_use;
7773               struct iv_cand *best_cand = NULL, *cand;
7774               unsigned i, best_pref = 0, cand_pref;
7775               tree comp = NULL_TREE;
7776
7777               memset (&dummy_use, 0, sizeof (dummy_use));
7778               dummy_use.iv = info->iv;
7779               for (i = 0; i < data->vgroups.length () && i < 64; i++)
7780                 {
7781                   cand = data->vgroups[i]->selected;
7782                   if (cand == best_cand)
7783                     continue;
7784                   cand_pref = operand_equal_p (cand->iv->step,
7785                                                info->iv->step, 0)
7786                     ? 4 : 0;
7787                   cand_pref
7788                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7789                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7790                     ? 2 : 0;
7791                   cand_pref
7792                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7793                     ? 1 : 0;
7794                   if (best_cand == NULL || best_pref < cand_pref)
7795                     {
7796                       tree this_comp
7797                         = get_debug_computation_at (data->current_loop,
7798                                                     SSA_NAME_DEF_STMT (def),
7799                                                     &dummy_use, cand);
7800                       if (this_comp)
7801                         {
7802                           best_cand = cand;
7803                           best_pref = cand_pref;
7804                           comp = this_comp;
7805                         }
7806                     }
7807                 }
7808
7809               if (!best_cand)
7810                 continue;
7811
7812               comp = unshare_expr (comp);
7813               if (count > 1)
7814                 {
7815                   tree vexpr = build_debug_expr_decl (TREE_TYPE (comp));
7816                   /* FIXME: Is setting the mode really necessary? */
7817                   if (SSA_NAME_VAR (def))
7818                     SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7819                   else
7820                     SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7821                   gdebug *def_temp
7822                     = gimple_build_debug_bind (vexpr, comp, NULL);
7823                   gimple_stmt_iterator gsi;
7824
7825                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7826                     gsi = gsi_after_labels (gimple_bb
7827                                             (SSA_NAME_DEF_STMT (def)));
7828                   else
7829                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7830
7831                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7832                   comp = vexpr;
7833                 }
7834
7835               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7836                 {
7837                   if (!gimple_debug_bind_p (stmt))
7838                     continue;
7839
7840                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7841                     SET_USE (use_p, comp);
7842
7843                   update_stmt (stmt);
7844                 }
7845             }
7846         }
7847     }
7848 }
7849
7850 /* Frees memory occupied by class tree_niter_desc in *VALUE. Callback
7851    for hash_map::traverse.  */
7852
7853 bool
7854 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7855 {
7856   free (value);
7857   return true;
7858 }
7859
7860 /* Frees data allocated by the optimization of a single loop.  */
7861
7862 static void
7863 free_loop_data (struct ivopts_data *data)
7864 {
7865   unsigned i, j;
7866   bitmap_iterator bi;
7867   tree obj;
7868
7869   if (data->niters)
7870     {
7871       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7872       delete data->niters;
7873       data->niters = NULL;
7874     }
7875
7876   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7877     {
7878       struct version_info *info;
7879
7880       info = ver_info (data, i);
7881       info->iv = NULL;
7882       info->has_nonlin_use = false;
7883       info->preserve_biv = false;
7884       info->inv_id = 0;
7885     }
7886   bitmap_clear (data->relevant);
7887   bitmap_clear (data->important_candidates);
7888
7889   for (i = 0; i < data->vgroups.length (); i++)
7890     {
7891       struct iv_group *group = data->vgroups[i];
7892
7893       for (j = 0; j < group->vuses.length (); j++)
7894         free (group->vuses[j]);
7895       group->vuses.release ();
7896
7897       BITMAP_FREE (group->related_cands);
7898       for (j = 0; j < group->n_map_members; j++)
7899         {
7900           if (group->cost_map[j].inv_vars)
7901             BITMAP_FREE (group->cost_map[j].inv_vars);
7902           if (group->cost_map[j].inv_exprs)
7903             BITMAP_FREE (group->cost_map[j].inv_exprs);
7904         }
7905
7906       free (group->cost_map);
7907       free (group);
7908     }
7909   data->vgroups.truncate (0);
7910
7911   for (i = 0; i < data->vcands.length (); i++)
7912     {
7913       struct iv_cand *cand = data->vcands[i];
7914
7915       if (cand->inv_vars)
7916         BITMAP_FREE (cand->inv_vars);
7917       if (cand->inv_exprs)
7918         BITMAP_FREE (cand->inv_exprs);
7919       free (cand);
7920     }
7921   data->vcands.truncate (0);
7922
7923   if (data->version_info_size < num_ssa_names)
7924     {
7925       data->version_info_size = 2 * num_ssa_names;
7926       free (data->version_info);
7927       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7928     }
7929
7930   data->max_inv_var_id = 0;
7931   data->max_inv_expr_id = 0;
7932
7933   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7934     SET_DECL_RTL (obj, NULL_RTX);
7935
7936   decl_rtl_to_reset.truncate (0);
7937
7938   data->inv_expr_tab->empty ();
7939
7940   data->iv_common_cand_tab->empty ();
7941   data->iv_common_cands.truncate (0);
7942 }
7943
7944 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7945    loop tree.  */
7946
7947 static void
7948 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7949 {
7950   free_loop_data (data);
7951   free (data->version_info);
7952   BITMAP_FREE (data->relevant);
7953   BITMAP_FREE (data->important_candidates);
7954
7955   decl_rtl_to_reset.release ();
7956   data->vgroups.release ();
7957   data->vcands.release ();
7958   delete data->inv_expr_tab;
7959   data->inv_expr_tab = NULL;
7960   free_affine_expand_cache (&data->name_expansion_cache);
7961   if (data->base_object_map)
7962     delete data->base_object_map;
7963   delete data->iv_common_cand_tab;
7964   data->iv_common_cand_tab = NULL;
7965   data->iv_common_cands.release ();
7966   obstack_free (&data->iv_obstack, NULL);
7967 }
7968
7969 /* Returns true if the loop body BODY includes any function calls.  */
7970
7971 static bool
7972 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7973 {
7974   gimple_stmt_iterator gsi;
7975   unsigned i;
7976
7977   for (i = 0; i < num_nodes; i++)
7978     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7979       {
7980         gimple *stmt = gsi_stmt (gsi);
7981         if (is_gimple_call (stmt)
7982             && !gimple_call_internal_p (stmt)
7983             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7984           return true;
7985       }
7986   return false;
7987 }
7988
7989 /* Determine cost scaling factor for basic blocks in loop.  */
7990 #define COST_SCALING_FACTOR_BOUND (20)
7991
7992 static void
7993 determine_scaling_factor (struct ivopts_data *data, basic_block *body)
7994 {
7995   int lfreq = data->current_loop->header->count.to_frequency (cfun);
7996   if (!data->speed || lfreq <= 0)
7997     return;
7998
7999   int max_freq = lfreq;
8000   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8001     {
8002       body[i]->aux = (void *)(intptr_t) 1;
8003       if (max_freq < body[i]->count.to_frequency (cfun))
8004         max_freq = body[i]->count.to_frequency (cfun);
8005     }
8006   if (max_freq > lfreq)
8007     {
8008       int divisor, factor;
8009       /* Check if scaling factor itself needs to be scaled by the bound.  This
8010          is to avoid overflow when scaling cost according to profile info.  */
8011       if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
8012         {
8013           divisor = max_freq;
8014           factor = COST_SCALING_FACTOR_BOUND;
8015         }
8016       else
8017         {
8018           divisor = lfreq;
8019           factor = 1;
8020         }
8021       for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8022         {
8023           int bfreq = body[i]->count.to_frequency (cfun);
8024           if (bfreq <= lfreq)
8025             continue;
8026
8027           body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
8028         }
8029     }
8030 }
8031
8032 /* Find doloop comparison use and set its doloop_p on if found.  */
8033
8034 static bool
8035 find_doloop_use (struct ivopts_data *data)
8036 {
8037   struct loop *loop = data->current_loop;
8038
8039   for (unsigned i = 0; i < data->vgroups.length (); i++)
8040     {
8041       struct iv_group *group = data->vgroups[i];
8042       if (group->type == USE_COMPARE)
8043         {
8044           gcc_assert (group->vuses.length () == 1);
8045           struct iv_use *use = group->vuses[0];
8046           gimple *stmt = use->stmt;
8047           if (gimple_code (stmt) == GIMPLE_COND)
8048             {
8049               basic_block bb = gimple_bb (stmt);
8050               edge true_edge, false_edge;
8051               extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
8052               /* This comparison is used for loop latch.  Require latch is empty
8053                  for now.  */
8054               if ((loop->latch == true_edge->dest
8055                    || loop->latch == false_edge->dest)
8056                   && empty_block_p (loop->latch))
8057                 {
8058                   group->doloop_p = true;
8059                   if (dump_file && (dump_flags & TDF_DETAILS))
8060                     {
8061                       fprintf (dump_file, "Doloop cmp iv use: ");
8062                       print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
8063                     }
8064                   return true;
8065                 }
8066             }
8067         }
8068     }
8069
8070   return false;
8071 }
8072
8073 /* For the targets which support doloop, to predict whether later RTL doloop
8074    transformation will perform on this loop, further detect the doloop use and
8075    mark the flag doloop_use_p if predicted.  */
8076
8077 void
8078 analyze_and_mark_doloop_use (struct ivopts_data *data)
8079 {
8080   data->doloop_use_p = false;
8081
8082   if (!flag_branch_on_count_reg)
8083     return;
8084
8085   if (data->current_loop->unroll == USHRT_MAX)
8086     return;
8087
8088   if (!generic_predict_doloop_p (data))
8089     return;
8090
8091   if (find_doloop_use (data))
8092     {
8093       data->doloop_use_p = true;
8094       if (dump_file && (dump_flags & TDF_DETAILS))
8095         {
8096           struct loop *loop = data->current_loop;
8097           fprintf (dump_file,
8098                    "Predict loop %d can perform"
8099                    " doloop optimization later.\n",
8100                    loop->num);
8101           flow_loop_dump (loop, dump_file, NULL, 1);
8102         }
8103     }
8104 }
8105
8106 /* Optimizes the LOOP.  Returns true if anything changed.  */
8107
8108 static bool
8109 tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop,
8110                            bitmap toremove)
8111 {
8112   bool changed = false;
8113   class iv_ca *iv_ca;
8114   edge exit = single_dom_exit (loop);
8115   basic_block *body;
8116
8117   gcc_assert (!data->niters);
8118   data->current_loop = loop;
8119   data->loop_loc = find_loop_location (loop).get_location_t ();
8120   data->speed = optimize_loop_for_speed_p (loop);
8121
8122   if (dump_file && (dump_flags & TDF_DETAILS))
8123     {
8124       fprintf (dump_file, "Processing loop %d", loop->num);
8125       if (data->loop_loc != UNKNOWN_LOCATION)
8126         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
8127                  LOCATION_LINE (data->loop_loc));
8128       fprintf (dump_file, "\n");
8129
8130       if (exit)
8131         {
8132           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
8133                    exit->src->index, exit->dest->index);
8134           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
8135           fprintf (dump_file, "\n");
8136         }
8137
8138       fprintf (dump_file, "\n");
8139     }
8140
8141   body = get_loop_body (loop);
8142   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
8143   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
8144
8145   data->loop_single_exit_p
8146     = exit != NULL && loop_only_exit_p (loop, body, exit);
8147
8148   /* For each ssa name determines whether it behaves as an induction variable
8149      in some loop.  */
8150   if (!find_induction_variables (data, body))
8151     goto finish;
8152
8153   /* Finds interesting uses (item 1).  */
8154   find_interesting_uses (data, body);
8155   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
8156     goto finish;
8157
8158   /* Determine cost scaling factor for basic blocks in loop.  */
8159   determine_scaling_factor (data, body);
8160
8161   /* Analyze doloop possibility and mark the doloop use if predicted.  */
8162   analyze_and_mark_doloop_use (data);
8163
8164   /* Finds candidates for the induction variables (item 2).  */
8165   find_iv_candidates (data);
8166
8167   /* Calculates the costs (item 3, part 1).  */
8168   determine_iv_costs (data);
8169   determine_group_iv_costs (data);
8170   determine_set_costs (data);
8171
8172   /* Find the optimal set of induction variables (item 3, part 2).  */
8173   iv_ca = find_optimal_iv_set (data);
8174   /* Cleanup basic block aux field.  */
8175   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8176     body[i]->aux = NULL;
8177   if (!iv_ca)
8178     goto finish;
8179   changed = true;
8180
8181   /* Create the new induction variables (item 4, part 1).  */
8182   create_new_ivs (data, iv_ca);
8183   iv_ca_free (&iv_ca);
8184
8185   /* Rewrite the uses (item 4, part 2).  */
8186   rewrite_groups (data);
8187
8188   /* Remove the ivs that are unused after rewriting.  */
8189   remove_unused_ivs (data, toremove);
8190
8191 finish:
8192   free (body);
8193   free_loop_data (data);
8194
8195   return changed;
8196 }
8197
8198 /* Main entry point.  Optimizes induction variables in loops.  */
8199
8200 void
8201 tree_ssa_iv_optimize (void)
8202 {
8203   struct ivopts_data data;
8204   auto_bitmap toremove;
8205
8206   tree_ssa_iv_optimize_init (&data);
8207   mark_ssa_maybe_undefs ();
8208
8209   /* Optimize the loops starting with the innermost ones.  */
8210   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
8211     {
8212       if (!dbg_cnt (ivopts_loop))
8213         continue;
8214
8215       if (dump_file && (dump_flags & TDF_DETAILS))
8216         flow_loop_dump (loop, dump_file, NULL, 1);
8217
8218       tree_ssa_iv_optimize_loop (&data, loop, toremove);
8219     }
8220
8221   /* Remove eliminated IV defs.  */
8222   release_defs_bitset (toremove);
8223
8224   /* We have changed the structure of induction variables; it might happen
8225      that definitions in the scev database refer to some of them that were
8226      eliminated.  */
8227   scev_reset_htab ();
8228   /* Likewise niter and control-IV information.  */
8229   free_numbers_of_iterations_estimates (cfun);
8230
8231   tree_ssa_iv_optimize_finalize (&data);
8232 }
8233
8234 #include "gt-tree-ssa-loop-ivopts.h"