gcc/tree-ssa-loop-ivopts.cc

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2023 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33       Note the interesting uses are categorized and handled in group.
  34       Generally, address type uses are grouped together if their iv bases
  35       are different in constant offset.
  36
  37    2) Candidates for the induction variables are found.  This includes
  38
  39       -- old induction variables
  40       -- the variables defined by expressions derived from the "interesting
  41          groups/uses" above
  42
  43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  44       cost function assigns a cost to sets of induction variables and consists
  45       of three parts:
  46
  47       -- The group/use costs.  Each of the interesting groups/uses chooses
  48          the best induction variable in the set and adds its cost to the sum.
  49          The cost reflects the time spent on modifying the induction variables
  50          value to be usable for the given purpose (adding base and offset for
  51          arrays, etc.).
  52       -- The variable costs.  Each of the variables has a cost assigned that
  53          reflects the costs associated with incrementing the value of the
  54          variable.  The original variables are somewhat preferred.
  55       -- The set cost.  Depending on the size of the set, extra cost may be
  56          added to reflect register pressure.
  57
  58       All the costs are defined in a machine-specific way, using the target
  59       hooks and machine descriptions to determine them.
  60
  61    4) The trees are transformed to use the new variables, the dead code is
  62       removed.
  63
  64    All of this is done loop by loop.  Doing it globally is theoretically
  65    possible, it might give a better performance and it might enable us
  66    to decide costs more precisely, but getting all the interactions right
  67    would be complicated.
  68
  69    For the targets supporting low-overhead loops, IVOPTs has to take care of
  70    the loops which will probably be transformed in RTL doloop optimization,
  71    to try to make selected IV candidate set optimal.  The process of doloop
  72    support includes:
  73
  74    1) Analyze the current loop will be transformed to doloop or not, find and
  75       mark its compare type IV use as doloop use (iv_group field doloop_p), and
  76       set flag doloop_use_p of ivopts_data to notify subsequent processings on
  77       doloop.  See analyze_and_mark_doloop_use and its callees for the details.
  78       The target hook predict_doloop_p can be used for target specific checks.
  79
  80    2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1},
  81       set flag doloop_p of iv_cand, step cost is set as zero and no extra cost
  82       like biv.  For cost determination between doloop IV cand and IV use, the
  83       target hooks doloop_cost_for_generic and doloop_cost_for_address are
  84       provided to add on extra costs for generic type and address type IV use.
  85       Zero cost is assigned to the pair between doloop IV cand and doloop IV
  86       use, and bound zero is set for IV elimination.
  87
  88    3) With the cost setting in step 2), the current cost model based IV
  89       selection algorithm will process as usual, pick up doloop dedicated IV if
  90       profitable.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "backend.h"
  96 #include "rtl.h"
  97 #include "tree.h"
  98 #include "gimple.h"
  99 #include "cfghooks.h"
 100 #include "tree-pass.h"
 101 #include "memmodel.h"
 102 #include "tm_p.h"
 103 #include "ssa.h"
 104 #include "expmed.h"
 105 #include "insn-config.h"
 106 #include "emit-rtl.h"
 107 #include "recog.h"
 108 #include "cgraph.h"
 109 #include "gimple-pretty-print.h"
 110 #include "alias.h"
 111 #include "fold-const.h"
 112 #include "stor-layout.h"
 113 #include "tree-eh.h"
 114 #include "gimplify.h"
 115 #include "gimple-iterator.h"
 116 #include "gimplify-me.h"
 117 #include "tree-cfg.h"
 118 #include "tree-ssa-loop-ivopts.h"
 119 #include "tree-ssa-loop-manip.h"
 120 #include "tree-ssa-loop-niter.h"
 121 #include "tree-ssa-loop.h"
 122 #include "explow.h"
 123 #include "expr.h"
 124 #include "tree-dfa.h"
 125 #include "tree-ssa.h"
 126 #include "cfgloop.h"
 127 #include "tree-scalar-evolution.h"
 128 #include "tree-affine.h"
 129 #include "tree-ssa-propagate.h"
 130 #include "tree-ssa-address.h"
 131 #include "builtins.h"
 132 #include "tree-vectorizer.h"
 133 #include "dbgcnt.h"
 134 #include "cfganal.h"
 135
 136 /* For lang_hooks.types.type_for_mode.  */
 137 #include "langhooks.h"
 138
 139 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 140    cost of different addressing modes.  This should be moved to a TBD
 141    interface between the GIMPLE and RTL worlds.  */
 142
 143 /* The infinite cost.  */
 144 #define INFTY 1000000000
 145
 146 /* Returns the expected number of loop iterations for LOOP.
 147    The average trip count is computed from profile data if it
 148    exists. */
 149
 150 static inline HOST_WIDE_INT
 151 avg_loop_niter (class loop *loop)
 152 {
 153   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 154   if (niter == -1)
 155     {
 156       niter = likely_max_stmt_executions_int (loop);
 157
 158       if (niter == -1 || niter > param_avg_loop_niter)
 159         return param_avg_loop_niter;
 160     }
 161
 162   return niter;
 163 }
 164
 165 struct iv_use;
 166
 167 /* Representation of the induction variable.  */
 168 struct iv
 169 {
 170   tree base;            /* Initial value of the iv.  */
 171   tree base_object;     /* A memory object to that the induction variable points.  */
 172   tree step;            /* Step of the iv (constant only).  */
 173   tree ssa_name;        /* The ssa name with the value.  */
 174   struct iv_use *nonlin_use;    /* The identifier in the use if it is the case.  */
 175   bool biv_p;           /* Is it a biv?  */
 176   bool no_overflow;     /* True if the iv doesn't overflow.  */
 177   bool have_address_use;/* For biv, indicate if it's used in any address
 178                            type use.  */
 179 };
 180
 181 /* Per-ssa version information (induction variable descriptions, etc.).  */
 182 struct version_info
 183 {
 184   tree name;            /* The ssa name.  */
 185   struct iv *iv;        /* Induction variable description.  */
 186   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 187                            an expression that is not an induction variable.  */
 188   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 189   unsigned inv_id;      /* Id of an invariant.  */
 190 };
 191
 192 /* Types of uses.  */
 193 enum use_type
 194 {
 195   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 196   USE_REF_ADDRESS,      /* Use is an address for an explicit memory
 197                            reference.  */
 198   USE_PTR_ADDRESS,      /* Use is a pointer argument to a function in
 199                            cases where the expansion of the function
 200                            will turn the argument into a normal address.  */
 201   USE_COMPARE           /* Use is a compare.  */
 202 };
 203
 204 /* Cost of a computation.  */
 205 class comp_cost
 206 {
 207 public:
 208   comp_cost (): cost (0), complexity (0), scratch (0)
 209   {}
 210
 211   comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
 212     : cost (cost), complexity (complexity), scratch (scratch)
 213   {}
 214
 215   /* Returns true if COST is infinite.  */
 216   bool infinite_cost_p ();
 217
 218   /* Adds costs COST1 and COST2.  */
 219   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
 220
 221   /* Adds COST to the comp_cost.  */
 222   comp_cost operator+= (comp_cost cost);
 223
 224   /* Adds constant C to this comp_cost.  */
 225   comp_cost operator+= (HOST_WIDE_INT c);
 226
 227   /* Subtracts constant C to this comp_cost.  */
 228   comp_cost operator-= (HOST_WIDE_INT c);
 229
 230   /* Divide the comp_cost by constant C.  */
 231   comp_cost operator/= (HOST_WIDE_INT c);
 232
 233   /* Multiply the comp_cost by constant C.  */
 234   comp_cost operator*= (HOST_WIDE_INT c);
 235
 236   /* Subtracts costs COST1 and COST2.  */
 237   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
 238
 239   /* Subtracts COST from this comp_cost.  */
 240   comp_cost operator-= (comp_cost cost);
 241
 242   /* Returns true if COST1 is smaller than COST2.  */
 243   friend bool operator< (comp_cost cost1, comp_cost cost2);
 244
 245   /* Returns true if COST1 and COST2 are equal.  */
 246   friend bool operator== (comp_cost cost1, comp_cost cost2);
 247
 248   /* Returns true if COST1 is smaller or equal than COST2.  */
 249   friend bool operator<= (comp_cost cost1, comp_cost cost2);
 250
 251   int64_t cost;         /* The runtime cost.  */
 252   unsigned complexity;  /* The estimate of the complexity of the code for
 253                            the computation (in no concrete units --
 254                            complexity field should be larger for more
 255                            complex expressions and addressing modes).  */
 256   int64_t scratch;      /* Scratch used during cost computation.  */
 257 };
 258
 259 static const comp_cost no_cost;
 260 static const comp_cost infinite_cost (INFTY, 0, INFTY);
 261
 262 bool
 263 comp_cost::infinite_cost_p ()
 264 {
 265   return cost == INFTY;
 266 }
 267
 268 comp_cost
 269 operator+ (comp_cost cost1, comp_cost cost2)
 270 {
 271   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
 272     return infinite_cost;
 273
 274   gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
 275   cost1.cost += cost2.cost;
 276   cost1.complexity += cost2.complexity;
 277
 278   return cost1;
 279 }
 280
 281 comp_cost
 282 operator- (comp_cost cost1, comp_cost cost2)
 283 {
 284   if (cost1.infinite_cost_p ())
 285     return infinite_cost;
 286
 287   gcc_assert (!cost2.infinite_cost_p ());
 288   gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
 289
 290   cost1.cost -= cost2.cost;
 291   cost1.complexity -= cost2.complexity;
 292
 293   return cost1;
 294 }
 295
 296 comp_cost
 297 comp_cost::operator+= (comp_cost cost)
 298 {
 299   *this = *this + cost;
 300   return *this;
 301 }
 302
 303 comp_cost
 304 comp_cost::operator+= (HOST_WIDE_INT c)
 305 {
 306   if (c >= INFTY)
 307     this->cost = INFTY;
 308
 309   if (infinite_cost_p ())
 310     return *this;
 311
 312   gcc_assert (this->cost + c < infinite_cost.cost);
 313   this->cost += c;
 314
 315   return *this;
 316 }
 317
 318 comp_cost
 319 comp_cost::operator-= (HOST_WIDE_INT c)
 320 {
 321   if (infinite_cost_p ())
 322     return *this;
 323
 324   gcc_assert (this->cost - c < infinite_cost.cost);
 325   this->cost -= c;
 326
 327   return *this;
 328 }
 329
 330 comp_cost
 331 comp_cost::operator/= (HOST_WIDE_INT c)
 332 {
 333   gcc_assert (c != 0);
 334   if (infinite_cost_p ())
 335     return *this;
 336
 337   this->cost /= c;
 338
 339   return *this;
 340 }
 341
 342 comp_cost
 343 comp_cost::operator*= (HOST_WIDE_INT c)
 344 {
 345   if (infinite_cost_p ())
 346     return *this;
 347
 348   gcc_assert (this->cost * c < infinite_cost.cost);
 349   this->cost *= c;
 350
 351   return *this;
 352 }
 353
 354 comp_cost
 355 comp_cost::operator-= (comp_cost cost)
 356 {
 357   *this = *this - cost;
 358   return *this;
 359 }
 360
 361 bool
 362 operator< (comp_cost cost1, comp_cost cost2)
 363 {
 364   if (cost1.cost == cost2.cost)
 365     return cost1.complexity < cost2.complexity;
 366
 367   return cost1.cost < cost2.cost;
 368 }
 369
 370 bool
 371 operator== (comp_cost cost1, comp_cost cost2)
 372 {
 373   return cost1.cost == cost2.cost
 374     && cost1.complexity == cost2.complexity;
 375 }
 376
 377 bool
 378 operator<= (comp_cost cost1, comp_cost cost2)
 379 {
 380   return cost1 < cost2 || cost1 == cost2;
 381 }
 382
 383 struct iv_inv_expr_ent;
 384
 385 /* The candidate - cost pair.  */
 386 class cost_pair
 387 {
 388 public:
 389   struct iv_cand *cand; /* The candidate.  */
 390   comp_cost cost;       /* The cost.  */
 391   enum tree_code comp;  /* For iv elimination, the comparison.  */
 392   bitmap inv_vars;      /* The list of invariant ssa_vars that have to be
 393                            preserved when representing iv_use with iv_cand.  */
 394   bitmap inv_exprs;     /* The list of newly created invariant expressions
 395                            when representing iv_use with iv_cand.  */
 396   tree value;           /* For final value elimination, the expression for
 397                            the final value of the iv.  For iv elimination,
 398                            the new bound to compare with.  */
 399 };
 400
 401 /* Use.  */
 402 struct iv_use
 403 {
 404   unsigned id;          /* The id of the use.  */
 405   unsigned group_id;    /* The group id the use belongs to.  */
 406   enum use_type type;   /* Type of the use.  */
 407   tree mem_type;        /* The memory type to use when testing whether an
 408                            address is legitimate, and what the address's
 409                            cost is.  */
 410   struct iv *iv;        /* The induction variable it is based on.  */
 411   gimple *stmt;         /* Statement in that it occurs.  */
 412   tree *op_p;           /* The place where it occurs.  */
 413
 414   tree addr_base;       /* Base address with const offset stripped.  */
 415   poly_uint64_pod addr_offset;
 416                         /* Const offset stripped from base address.  */
 417 };
 418
 419 /* Group of uses.  */
 420 struct iv_group
 421 {
 422   /* The id of the group.  */
 423   unsigned id;
 424   /* Uses of the group are of the same type.  */
 425   enum use_type type;
 426   /* The set of "related" IV candidates, plus the important ones.  */
 427   bitmap related_cands;
 428   /* Number of IV candidates in the cost_map.  */
 429   unsigned n_map_members;
 430   /* The costs wrto the iv candidates.  */
 431   class cost_pair *cost_map;
 432   /* The selected candidate for the group.  */
 433   struct iv_cand *selected;
 434   /* To indicate this is a doloop use group.  */
 435   bool doloop_p;
 436   /* Uses in the group.  */
 437   vec<struct iv_use *> vuses;
 438 };
 439
 440 /* The position where the iv is computed.  */
 441 enum iv_position
 442 {
 443   IP_NORMAL,            /* At the end, just before the exit condition.  */
 444   IP_END,               /* At the end of the latch block.  */
 445   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 446   IP_AFTER_USE,         /* Immediately after a specific use.  */
 447   IP_ORIGINAL           /* The original biv.  */
 448 };
 449
 450 /* The induction variable candidate.  */
 451 struct iv_cand
 452 {
 453   unsigned id;          /* The number of the candidate.  */
 454   bool important;       /* Whether this is an "important" candidate, i.e. such
 455                            that it should be considered by all uses.  */
 456   bool involves_undefs; /* Whether the IV involves undefined values.  */
 457   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 458   gimple *incremented_at;/* For original biv, the statement where it is
 459                            incremented.  */
 460   tree var_before;      /* The variable used for it before increment.  */
 461   tree var_after;       /* The variable used for it after increment.  */
 462   struct iv *iv;        /* The value of the candidate.  NULL for
 463                            "pseudocandidate" used to indicate the possibility
 464                            to replace the final value of an iv by direct
 465                            computation of the value.  */
 466   unsigned cost;        /* Cost of the candidate.  */
 467   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 468   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 469                               where it is incremented.  */
 470   bitmap inv_vars;      /* The list of invariant ssa_vars used in step of the
 471                            iv_cand.  */
 472   bitmap inv_exprs;     /* If step is more complicated than a single ssa_var,
 473                            handle it as a new invariant expression which will
 474                            be hoisted out of loop.  */
 475   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 476                            smaller type.  */
 477   bool doloop_p;        /* Whether this is a doloop candidate.  */
 478 };
 479
 480 /* Hashtable entry for common candidate derived from iv uses.  */
 481 class iv_common_cand
 482 {
 483 public:
 484   tree base;
 485   tree step;
 486   /* IV uses from which this common candidate is derived.  */
 487   auto_vec<struct iv_use *> uses;
 488   hashval_t hash;
 489 };
 490
 491 /* Hashtable helpers.  */
 492
 493 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 494 {
 495   static inline hashval_t hash (const iv_common_cand *);
 496   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 497 };
 498
 499 /* Hash function for possible common candidates.  */
 500
 501 inline hashval_t
 502 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 503 {
 504   return ccand->hash;
 505 }
 506
 507 /* Hash table equality function for common candidates.  */
 508
 509 inline bool
 510 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 511                               const iv_common_cand *ccand2)
 512 {
 513   return (ccand1->hash == ccand2->hash
 514           && operand_equal_p (ccand1->base, ccand2->base, 0)
 515           && operand_equal_p (ccand1->step, ccand2->step, 0)
 516           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 517               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 518 }
 519
 520 /* Loop invariant expression hashtable entry.  */
 521
 522 struct iv_inv_expr_ent
 523 {
 524   /* Tree expression of the entry.  */
 525   tree expr;
 526   /* Unique indentifier.  */
 527   int id;
 528   /* Hash value.  */
 529   hashval_t hash;
 530 };
 531
 532 /* Sort iv_inv_expr_ent pair A and B by id field.  */
 533
 534 static int
 535 sort_iv_inv_expr_ent (const void *a, const void *b)
 536 {
 537   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
 538   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
 539
 540   unsigned id1 = (*e1)->id;
 541   unsigned id2 = (*e2)->id;
 542
 543   if (id1 < id2)
 544     return -1;
 545   else if (id1 > id2)
 546     return 1;
 547   else
 548     return 0;
 549 }
 550
 551 /* Hashtable helpers.  */
 552
 553 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 554 {
 555   static inline hashval_t hash (const iv_inv_expr_ent *);
 556   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 557 };
 558
 559 /* Return true if uses of type TYPE represent some form of address.  */
 560
 561 inline bool
 562 address_p (use_type type)
 563 {
 564   return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
 565 }
 566
 567 /* Hash function for loop invariant expressions.  */
 568
 569 inline hashval_t
 570 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 571 {
 572   return expr->hash;
 573 }
 574
 575 /* Hash table equality function for expressions.  */
 576
 577 inline bool
 578 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 579                            const iv_inv_expr_ent *expr2)
 580 {
 581   return expr1->hash == expr2->hash
 582          && operand_equal_p (expr1->expr, expr2->expr, 0);
 583 }
 584
 585 struct ivopts_data
 586 {
 587   /* The currently optimized loop.  */
 588   class loop *current_loop;
 589   location_t loop_loc;
 590
 591   /* Numbers of iterations for all exits of the current loop.  */
 592   hash_map<edge, tree_niter_desc *> *niters;
 593
 594   /* Number of registers used in it.  */
 595   unsigned regs_used;
 596
 597   /* The size of version_info array allocated.  */
 598   unsigned version_info_size;
 599
 600   /* The array of information for the ssa names.  */
 601   struct version_info *version_info;
 602
 603   /* The hashtable of loop invariant expressions created
 604      by ivopt.  */
 605   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 606
 607   /* The bitmap of indices in version_info whose value was changed.  */
 608   bitmap relevant;
 609
 610   /* The uses of induction variables.  */
 611   vec<iv_group *> vgroups;
 612
 613   /* The candidates.  */
 614   vec<iv_cand *> vcands;
 615
 616   /* A bitmap of important candidates.  */
 617   bitmap important_candidates;
 618
 619   /* Cache used by tree_to_aff_combination_expand.  */
 620   hash_map<tree, name_expansion *> *name_expansion_cache;
 621
 622   /* The hashtable of common candidates derived from iv uses.  */
 623   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 624
 625   /* The common candidates.  */
 626   vec<iv_common_cand *> iv_common_cands;
 627
 628   /* Hash map recording base object information of tree exp.  */
 629   hash_map<tree, tree> *base_object_map;
 630
 631   /* The maximum invariant variable id.  */
 632   unsigned max_inv_var_id;
 633
 634   /* The maximum invariant expression id.  */
 635   unsigned max_inv_expr_id;
 636
 637   /* Number of no_overflow BIVs which are not used in memory address.  */
 638   unsigned bivs_not_used_in_addr;
 639
 640   /* Obstack for iv structure.  */
 641   struct obstack iv_obstack;
 642
 643   /* Whether to consider just related and important candidates when replacing a
 644      use.  */
 645   bool consider_all_candidates;
 646
 647   /* Are we optimizing for speed?  */
 648   bool speed;
 649
 650   /* Whether the loop body includes any function calls.  */
 651   bool body_includes_call;
 652
 653   /* Whether the loop body can only be exited via single exit.  */
 654   bool loop_single_exit_p;
 655
 656   /* Whether the loop has doloop comparison use.  */
 657   bool doloop_use_p;
 658 };
 659
 660 /* An assignment of iv candidates to uses.  */
 661
 662 class iv_ca
 663 {
 664 public:
 665   /* The number of uses covered by the assignment.  */
 666   unsigned upto;
 667
 668   /* Number of uses that cannot be expressed by the candidates in the set.  */
 669   unsigned bad_groups;
 670
 671   /* Candidate assigned to a use, together with the related costs.  */
 672   class cost_pair **cand_for_group;
 673
 674   /* Number of times each candidate is used.  */
 675   unsigned *n_cand_uses;
 676
 677   /* The candidates used.  */
 678   bitmap cands;
 679
 680   /* The number of candidates in the set.  */
 681   unsigned n_cands;
 682
 683   /* The number of invariants needed, including both invariant variants and
 684      invariant expressions.  */
 685   unsigned n_invs;
 686
 687   /* Total cost of expressing uses.  */
 688   comp_cost cand_use_cost;
 689
 690   /* Total cost of candidates.  */
 691   int64_t cand_cost;
 692
 693   /* Number of times each invariant variable is used.  */
 694   unsigned *n_inv_var_uses;
 695
 696   /* Number of times each invariant expression is used.  */
 697   unsigned *n_inv_expr_uses;
 698
 699   /* Total cost of the assignment.  */
 700   comp_cost cost;
 701 };
 702
 703 /* Difference of two iv candidate assignments.  */
 704
 705 struct iv_ca_delta
 706 {
 707   /* Changed group.  */
 708   struct iv_group *group;
 709
 710   /* An old assignment (for rollback purposes).  */
 711   class cost_pair *old_cp;
 712
 713   /* A new assignment.  */
 714   class cost_pair *new_cp;
 715
 716   /* Next change in the list.  */
 717   struct iv_ca_delta *next;
 718 };
 719
 720 /* Bound on number of candidates below that all candidates are considered.  */
 721
 722 #define CONSIDER_ALL_CANDIDATES_BOUND \
 723   ((unsigned) param_iv_consider_all_candidates_bound)
 724
 725 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 726    optimizing such a loop would help, and it would take ages).  */
 727
 728 #define MAX_CONSIDERED_GROUPS \
 729   ((unsigned) param_iv_max_considered_uses)
 730
 731 /* If there are at most this number of ivs in the set, try removing unnecessary
 732    ivs from the set always.  */
 733
 734 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 735   ((unsigned) param_iv_always_prune_cand_set_bound)
 736
 737 /* The list of trees for that the decl_rtl field must be reset is stored
 738    here.  */
 739
 740 static vec<tree> decl_rtl_to_reset;
 741
 742 static comp_cost force_expr_to_var_cost (tree, bool);
 743
 744 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 745
 746 edge
 747 single_dom_exit (class loop *loop)
 748 {
 749   edge exit = single_exit (loop);
 750
 751   if (!exit)
 752     return NULL;
 753
 754   if (!just_once_each_iteration_p (loop, exit->src))
 755     return NULL;
 756
 757   return exit;
 758 }
 759
 760 /* Dumps information about the induction variable IV to FILE.  Don't dump
 761    variable's name if DUMP_NAME is FALSE.  The information is dumped with
 762    preceding spaces indicated by INDENT_LEVEL.  */
 763
 764 void
 765 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
 766 {
 767   const char *p;
 768   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
 769
 770   if (indent_level > 4)
 771     indent_level = 4;
 772   p = spaces + 8 - (indent_level << 1);
 773
 774   fprintf (file, "%sIV struct:\n", p);
 775   if (iv->ssa_name && dump_name)
 776     {
 777       fprintf (file, "%s  SSA_NAME:\t", p);
 778       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 779       fprintf (file, "\n");
 780     }
 781
 782   fprintf (file, "%s  Type:\t", p);
 783   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 784   fprintf (file, "\n");
 785
 786   fprintf (file, "%s  Base:\t", p);
 787   print_generic_expr (file, iv->base, TDF_SLIM);
 788   fprintf (file, "\n");
 789
 790   fprintf (file, "%s  Step:\t", p);
 791   print_generic_expr (file, iv->step, TDF_SLIM);
 792   fprintf (file, "\n");
 793
 794   if (iv->base_object)
 795     {
 796       fprintf (file, "%s  Object:\t", p);
 797       print_generic_expr (file, iv->base_object, TDF_SLIM);
 798       fprintf (file, "\n");
 799     }
 800
 801   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
 802
 803   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
 804            p, iv->no_overflow ? "No-overflow" : "Overflow");
 805 }
 806
 807 /* Dumps information about the USE to FILE.  */
 808
 809 void
 810 dump_use (FILE *file, struct iv_use *use)
 811 {
 812   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
 813   fprintf (file, "    At stmt:\t");
 814   print_gimple_stmt (file, use->stmt, 0);
 815   fprintf (file, "    At pos:\t");
 816   if (use->op_p)
 817     print_generic_expr (file, *use->op_p, TDF_SLIM);
 818   fprintf (file, "\n");
 819   dump_iv (file, use->iv, false, 2);
 820 }
 821
 822 /* Dumps information about the uses to FILE.  */
 823
 824 void
 825 dump_groups (FILE *file, struct ivopts_data *data)
 826 {
 827   unsigned i, j;
 828   struct iv_group *group;
 829
 830   for (i = 0; i < data->vgroups.length (); i++)
 831     {
 832       group = data->vgroups[i];
 833       fprintf (file, "Group %d:\n", group->id);
 834       if (group->type == USE_NONLINEAR_EXPR)
 835         fprintf (file, "  Type:\tGENERIC\n");
 836       else if (group->type == USE_REF_ADDRESS)
 837         fprintf (file, "  Type:\tREFERENCE ADDRESS\n");
 838       else if (group->type == USE_PTR_ADDRESS)
 839         fprintf (file, "  Type:\tPOINTER ARGUMENT ADDRESS\n");
 840       else
 841         {
 842           gcc_assert (group->type == USE_COMPARE);
 843           fprintf (file, "  Type:\tCOMPARE\n");
 844         }
 845       for (j = 0; j < group->vuses.length (); j++)
 846         dump_use (file, group->vuses[j]);
 847     }
 848 }
 849
 850 /* Dumps information about induction variable candidate CAND to FILE.  */
 851
 852 void
 853 dump_cand (FILE *file, struct iv_cand *cand)
 854 {
 855   struct iv *iv = cand->iv;
 856
 857   fprintf (file, "Candidate %d:\n", cand->id);
 858   if (cand->inv_vars)
 859     {
 860       fprintf (file, "  Depend on inv.vars: ");
 861       dump_bitmap (file, cand->inv_vars);
 862     }
 863   if (cand->inv_exprs)
 864     {
 865       fprintf (file, "  Depend on inv.exprs: ");
 866       dump_bitmap (file, cand->inv_exprs);
 867     }
 868
 869   if (cand->var_before)
 870     {
 871       fprintf (file, "  Var befor: ");
 872       print_generic_expr (file, cand->var_before, TDF_SLIM);
 873       fprintf (file, "\n");
 874     }
 875   if (cand->var_after)
 876     {
 877       fprintf (file, "  Var after: ");
 878       print_generic_expr (file, cand->var_after, TDF_SLIM);
 879       fprintf (file, "\n");
 880     }
 881
 882   switch (cand->pos)
 883     {
 884     case IP_NORMAL:
 885       fprintf (file, "  Incr POS: before exit test\n");
 886       break;
 887
 888     case IP_BEFORE_USE:
 889       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
 890       break;
 891
 892     case IP_AFTER_USE:
 893       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
 894       break;
 895
 896     case IP_END:
 897       fprintf (file, "  Incr POS: at end\n");
 898       break;
 899
 900     case IP_ORIGINAL:
 901       fprintf (file, "  Incr POS: orig biv\n");
 902       break;
 903     }
 904
 905   dump_iv (file, iv, false, 1);
 906 }
 907
 908 /* Returns the info for ssa version VER.  */
 909
 910 static inline struct version_info *
 911 ver_info (struct ivopts_data *data, unsigned ver)
 912 {
 913   return data->version_info + ver;
 914 }
 915
 916 /* Returns the info for ssa name NAME.  */
 917
 918 static inline struct version_info *
 919 name_info (struct ivopts_data *data, tree name)
 920 {
 921   return ver_info (data, SSA_NAME_VERSION (name));
 922 }
 923
 924 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 925    emitted in LOOP.  */
 926
 927 static bool
 928 stmt_after_ip_normal_pos (class loop *loop, gimple *stmt)
 929 {
 930   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 931
 932   gcc_assert (bb);
 933
 934   if (sbb == loop->latch)
 935     return true;
 936
 937   if (sbb != bb)
 938     return false;
 939
 940   return stmt == last_nondebug_stmt (bb);
 941 }
 942
 943 /* Returns true if STMT if after the place where the original induction
 944    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 945    if the positions are identical.  */
 946
 947 static bool
 948 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 949 {
 950   basic_block cand_bb = gimple_bb (cand->incremented_at);
 951   basic_block stmt_bb = gimple_bb (stmt);
 952
 953   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 954     return false;
 955
 956   if (stmt_bb != cand_bb)
 957     return true;
 958
 959   if (true_if_equal
 960       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 961     return true;
 962   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 963 }
 964
 965 /* Returns true if STMT if after the place where the induction variable
 966    CAND is incremented in LOOP.  */
 967
 968 static bool
 969 stmt_after_increment (class loop *loop, struct iv_cand *cand, gimple *stmt)
 970 {
 971   switch (cand->pos)
 972     {
 973     case IP_END:
 974       return false;
 975
 976     case IP_NORMAL:
 977       return stmt_after_ip_normal_pos (loop, stmt);
 978
 979     case IP_ORIGINAL:
 980     case IP_AFTER_USE:
 981       return stmt_after_inc_pos (cand, stmt, false);
 982
 983     case IP_BEFORE_USE:
 984       return stmt_after_inc_pos (cand, stmt, true);
 985
 986     default:
 987       gcc_unreachable ();
 988     }
 989 }
 990
 991 /* walk_tree callback for contains_abnormal_ssa_name_p.  */
 992
 993 static tree
 994 contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
 995 {
 996   if (TREE_CODE (*tp) == SSA_NAME
 997       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
 998     return *tp;
 999
1000   if (!EXPR_P (*tp))
1001     *walk_subtrees = 0;
1002
1003   return NULL_TREE;
1004 }
1005
1006 /* Returns true if EXPR contains a ssa name that occurs in an
1007    abnormal phi node.  */
1008
1009 bool
1010 contains_abnormal_ssa_name_p (tree expr)
1011 {
1012   return walk_tree_without_duplicates
1013            (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
1014 }
1015
1016 /*  Returns the structure describing number of iterations determined from
1017     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1018
1019 static class tree_niter_desc *
1020 niter_for_exit (struct ivopts_data *data, edge exit)
1021 {
1022   class tree_niter_desc *desc;
1023   tree_niter_desc **slot;
1024
1025   if (!data->niters)
1026     {
1027       data->niters = new hash_map<edge, tree_niter_desc *>;
1028       slot = NULL;
1029     }
1030   else
1031     slot = data->niters->get (exit);
1032
1033   if (!slot)
1034     {
1035       /* Try to determine number of iterations.  We cannot safely work with ssa
1036          names that appear in phi nodes on abnormal edges, so that we do not
1037          create overlapping life ranges for them (PR 27283).  */
1038       desc = XNEW (class tree_niter_desc);
1039       if (!number_of_iterations_exit (data->current_loop,
1040                                       exit, desc, true)
1041           || contains_abnormal_ssa_name_p (desc->niter))
1042         {
1043           XDELETE (desc);
1044           desc = NULL;
1045         }
1046       data->niters->put (exit, desc);
1047     }
1048   else
1049     desc = *slot;
1050
1051   return desc;
1052 }
1053
1054 /* Returns the structure describing number of iterations determined from
1055    single dominating exit of DATA->current_loop, or NULL if something
1056    goes wrong.  */
1057
1058 static class tree_niter_desc *
1059 niter_for_single_dom_exit (struct ivopts_data *data)
1060 {
1061   edge exit = single_dom_exit (data->current_loop);
1062
1063   if (!exit)
1064     return NULL;
1065
1066   return niter_for_exit (data, exit);
1067 }
1068
1069 /* Initializes data structures used by the iv optimization pass, stored
1070    in DATA.  */
1071
1072 static void
1073 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1074 {
1075   data->version_info_size = 2 * num_ssa_names;
1076   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1077   data->relevant = BITMAP_ALLOC (NULL);
1078   data->important_candidates = BITMAP_ALLOC (NULL);
1079   data->max_inv_var_id = 0;
1080   data->max_inv_expr_id = 0;
1081   data->niters = NULL;
1082   data->vgroups.create (20);
1083   data->vcands.create (20);
1084   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1085   data->name_expansion_cache = NULL;
1086   data->base_object_map = NULL;
1087   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1088   data->iv_common_cands.create (20);
1089   decl_rtl_to_reset.create (20);
1090   gcc_obstack_init (&data->iv_obstack);
1091 }
1092
1093 /* walk_tree callback for determine_base_object.  */
1094
1095 static tree
1096 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1097 {
1098   tree_code code = TREE_CODE (*tp);
1099   tree obj = NULL_TREE;
1100   if (code == ADDR_EXPR)
1101     {
1102       tree base = get_base_address (TREE_OPERAND (*tp, 0));
1103       if (!base)
1104         obj = *tp;
1105       else if (TREE_CODE (base) != MEM_REF)
1106         obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1107     }
1108   else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1109         obj = fold_convert (ptr_type_node, *tp);
1110
1111   if (!obj)
1112     {
1113       if (!EXPR_P (*tp))
1114         *walk_subtrees = 0;
1115
1116       return NULL_TREE;
1117     }
1118   /* Record special node for multiple base objects and stop.  */
1119   if (*static_cast<tree *> (wdata))
1120     {
1121       *static_cast<tree *> (wdata) = integer_zero_node;
1122       return integer_zero_node;
1123     }
1124   /* Record the base object and continue looking.  */
1125   *static_cast<tree *> (wdata) = obj;
1126   return NULL_TREE;
1127 }
1128
1129 /* Returns a memory object to that EXPR points with caching.  Return NULL if we
1130    are able to determine that it does not point to any such object; specially
1131    return integer_zero_node if EXPR contains multiple base objects.  */
1132
1133 static tree
1134 determine_base_object (struct ivopts_data *data, tree expr)
1135 {
1136   tree *slot, obj = NULL_TREE;
1137   if (data->base_object_map)
1138     {
1139       if ((slot = data->base_object_map->get(expr)) != NULL)
1140         return *slot;
1141     }
1142   else
1143     data->base_object_map = new hash_map<tree, tree>;
1144
1145   (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1146   data->base_object_map->put (expr, obj);
1147   return obj;
1148 }
1149
1150 /* Return true if address expression with non-DECL_P operand appears
1151    in EXPR.  */
1152
1153 static bool
1154 contain_complex_addr_expr (tree expr)
1155 {
1156   bool res = false;
1157
1158   STRIP_NOPS (expr);
1159   switch (TREE_CODE (expr))
1160     {
1161     case POINTER_PLUS_EXPR:
1162     case PLUS_EXPR:
1163     case MINUS_EXPR:
1164       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1165       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1166       break;
1167
1168     case ADDR_EXPR:
1169       return (!DECL_P (TREE_OPERAND (expr, 0)));
1170
1171     default:
1172       return false;
1173     }
1174
1175   return res;
1176 }
1177
1178 /* Allocates an induction variable with given initial value BASE and step STEP
1179    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1180
1181 static struct iv *
1182 alloc_iv (struct ivopts_data *data, tree base, tree step,
1183           bool no_overflow = false)
1184 {
1185   tree expr = base;
1186   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1187                                               sizeof (struct iv));
1188   gcc_assert (step != NULL_TREE);
1189
1190   /* Lower address expression in base except ones with DECL_P as operand.
1191      By doing this:
1192        1) More accurate cost can be computed for address expressions;
1193        2) Duplicate candidates won't be created for bases in different
1194           forms, like &a[0] and &a.  */
1195   STRIP_NOPS (expr);
1196   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1197       || contain_complex_addr_expr (expr))
1198     {
1199       aff_tree comb;
1200       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1201       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1202     }
1203
1204   iv->base = base;
1205   iv->base_object = determine_base_object (data, base);
1206   iv->step = step;
1207   iv->biv_p = false;
1208   iv->nonlin_use = NULL;
1209   iv->ssa_name = NULL_TREE;
1210   if (!no_overflow
1211        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1212                               base, step))
1213     no_overflow = true;
1214   iv->no_overflow = no_overflow;
1215   iv->have_address_use = false;
1216
1217   return iv;
1218 }
1219
1220 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1221    doesn't overflow.  */
1222
1223 static void
1224 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1225         bool no_overflow)
1226 {
1227   struct version_info *info = name_info (data, iv);
1228
1229   gcc_assert (!info->iv);
1230
1231   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1232   info->iv = alloc_iv (data, base, step, no_overflow);
1233   info->iv->ssa_name = iv;
1234 }
1235
1236 /* Finds induction variable declaration for VAR.  */
1237
1238 static struct iv *
1239 get_iv (struct ivopts_data *data, tree var)
1240 {
1241   basic_block bb;
1242   tree type = TREE_TYPE (var);
1243
1244   if (!POINTER_TYPE_P (type)
1245       && !INTEGRAL_TYPE_P (type))
1246     return NULL;
1247
1248   if (!name_info (data, var)->iv)
1249     {
1250       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1251
1252       if (!bb
1253           || !flow_bb_inside_loop_p (data->current_loop, bb))
1254         {
1255           if (POINTER_TYPE_P (type))
1256             type = sizetype;
1257           set_iv (data, var, var, build_int_cst (type, 0), true);
1258         }
1259     }
1260
1261   return name_info (data, var)->iv;
1262 }
1263
1264 /* Return the first non-invariant ssa var found in EXPR.  */
1265
1266 static tree
1267 extract_single_var_from_expr (tree expr)
1268 {
1269   int i, n;
1270   tree tmp;
1271   enum tree_code code;
1272
1273   if (!expr || is_gimple_min_invariant (expr))
1274     return NULL;
1275
1276   code = TREE_CODE (expr);
1277   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1278     {
1279       n = TREE_OPERAND_LENGTH (expr);
1280       for (i = 0; i < n; i++)
1281         {
1282           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1283
1284           if (tmp)
1285             return tmp;
1286         }
1287     }
1288   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1289 }
1290
1291 /* Finds basic ivs.  */
1292
1293 static bool
1294 find_bivs (struct ivopts_data *data)
1295 {
1296   gphi *phi;
1297   affine_iv iv;
1298   tree step, type, base, stop;
1299   bool found = false;
1300   class loop *loop = data->current_loop;
1301   gphi_iterator psi;
1302
1303   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1304     {
1305       phi = psi.phi ();
1306
1307       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1308         continue;
1309
1310       if (virtual_operand_p (PHI_RESULT (phi)))
1311         continue;
1312
1313       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1314         continue;
1315
1316       if (integer_zerop (iv.step))
1317         continue;
1318
1319       step = iv.step;
1320       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1321       /* Stop expanding iv base at the first ssa var referred by iv step.
1322          Ideally we should stop at any ssa var, because that's expensive
1323          and unusual to happen, we just do it on the first one.
1324
1325          See PR64705 for the rationale.  */
1326       stop = extract_single_var_from_expr (step);
1327       base = expand_simple_operations (base, stop);
1328       if (contains_abnormal_ssa_name_p (base)
1329           || contains_abnormal_ssa_name_p (step))
1330         continue;
1331
1332       type = TREE_TYPE (PHI_RESULT (phi));
1333       base = fold_convert (type, base);
1334       if (step)
1335         {
1336           if (POINTER_TYPE_P (type))
1337             step = convert_to_ptrofftype (step);
1338           else
1339             step = fold_convert (type, step);
1340         }
1341
1342       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1343       found = true;
1344     }
1345
1346   return found;
1347 }
1348
1349 /* Marks basic ivs.  */
1350
1351 static void
1352 mark_bivs (struct ivopts_data *data)
1353 {
1354   gphi *phi;
1355   gimple *def;
1356   tree var;
1357   struct iv *iv, *incr_iv;
1358   class loop *loop = data->current_loop;
1359   basic_block incr_bb;
1360   gphi_iterator psi;
1361
1362   data->bivs_not_used_in_addr = 0;
1363   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1364     {
1365       phi = psi.phi ();
1366
1367       iv = get_iv (data, PHI_RESULT (phi));
1368       if (!iv)
1369         continue;
1370
1371       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1372       def = SSA_NAME_DEF_STMT (var);
1373       /* Don't mark iv peeled from other one as biv.  */
1374       if (def
1375           && gimple_code (def) == GIMPLE_PHI
1376           && gimple_bb (def) == loop->header)
1377         continue;
1378
1379       incr_iv = get_iv (data, var);
1380       if (!incr_iv)
1381         continue;
1382
1383       /* If the increment is in the subloop, ignore it.  */
1384       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1385       if (incr_bb->loop_father != data->current_loop
1386           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1387         continue;
1388
1389       iv->biv_p = true;
1390       incr_iv->biv_p = true;
1391       if (iv->no_overflow)
1392         data->bivs_not_used_in_addr++;
1393       if (incr_iv->no_overflow)
1394         data->bivs_not_used_in_addr++;
1395     }
1396 }
1397
1398 /* Checks whether STMT defines a linear induction variable and stores its
1399    parameters to IV.  */
1400
1401 static bool
1402 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1403 {
1404   tree lhs, stop;
1405   class loop *loop = data->current_loop;
1406
1407   iv->base = NULL_TREE;
1408   iv->step = NULL_TREE;
1409
1410   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1411     return false;
1412
1413   lhs = gimple_assign_lhs (stmt);
1414   if (TREE_CODE (lhs) != SSA_NAME)
1415     return false;
1416
1417   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1418     return false;
1419
1420   /* Stop expanding iv base at the first ssa var referred by iv step.
1421      Ideally we should stop at any ssa var, because that's expensive
1422      and unusual to happen, we just do it on the first one.
1423
1424      See PR64705 for the rationale.  */
1425   stop = extract_single_var_from_expr (iv->step);
1426   iv->base = expand_simple_operations (iv->base, stop);
1427   if (contains_abnormal_ssa_name_p (iv->base)
1428       || contains_abnormal_ssa_name_p (iv->step))
1429     return false;
1430
1431   /* If STMT could throw, then do not consider STMT as defining a GIV.
1432      While this will suppress optimizations, we cannot safely delete this
1433      GIV and associated statements, even if it appears it is not used.  */
1434   if (stmt_could_throw_p (cfun, stmt))
1435     return false;
1436
1437   return true;
1438 }
1439
1440 /* Finds general ivs in statement STMT.  */
1441
1442 static void
1443 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1444 {
1445   affine_iv iv;
1446
1447   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1448     return;
1449
1450   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1451 }
1452
1453 /* Finds general ivs in basic block BB.  */
1454
1455 static void
1456 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1457 {
1458   gimple_stmt_iterator bsi;
1459
1460   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1461     find_givs_in_stmt (data, gsi_stmt (bsi));
1462 }
1463
1464 /* Finds general ivs.  */
1465
1466 static void
1467 find_givs (struct ivopts_data *data, basic_block *body)
1468 {
1469   class loop *loop = data->current_loop;
1470   unsigned i;
1471
1472   for (i = 0; i < loop->num_nodes; i++)
1473     find_givs_in_bb (data, body[i]);
1474 }
1475
1476 /* For each ssa name defined in LOOP determines whether it is an induction
1477    variable and if so, its initial value and step.  */
1478
1479 static bool
1480 find_induction_variables (struct ivopts_data *data, basic_block *body)
1481 {
1482   unsigned i;
1483   bitmap_iterator bi;
1484
1485   if (!find_bivs (data))
1486     return false;
1487
1488   find_givs (data, body);
1489   mark_bivs (data);
1490
1491   if (dump_file && (dump_flags & TDF_DETAILS))
1492     {
1493       class tree_niter_desc *niter = niter_for_single_dom_exit (data);
1494
1495       if (niter)
1496         {
1497           fprintf (dump_file, "  number of iterations ");
1498           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1499           if (!integer_zerop (niter->may_be_zero))
1500             {
1501               fprintf (dump_file, "; zero if ");
1502               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1503             }
1504           fprintf (dump_file, "\n");
1505         };
1506
1507       fprintf (dump_file, "\n<Induction Vars>:\n");
1508       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1509         {
1510           struct version_info *info = ver_info (data, i);
1511           if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1512             dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1513         }
1514     }
1515
1516   return true;
1517 }
1518
1519 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1520    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1521    is the const offset stripped from IV base and MEM_TYPE is the type
1522    of the memory being addressed.  For uses of other types, ADDR_BASE
1523    and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE.  */
1524
1525 static struct iv_use *
1526 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1527             gimple *stmt, enum use_type type, tree mem_type,
1528             tree addr_base, poly_uint64 addr_offset)
1529 {
1530   struct iv_use *use = XCNEW (struct iv_use);
1531
1532   use->id = group->vuses.length ();
1533   use->group_id = group->id;
1534   use->type = type;
1535   use->mem_type = mem_type;
1536   use->iv = iv;
1537   use->stmt = stmt;
1538   use->op_p = use_p;
1539   use->addr_base = addr_base;
1540   use->addr_offset = addr_offset;
1541
1542   group->vuses.safe_push (use);
1543   return use;
1544 }
1545
1546 /* Checks whether OP is a loop-level invariant and if so, records it.
1547    NONLINEAR_USE is true if the invariant is used in a way we do not
1548    handle specially.  */
1549
1550 static void
1551 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1552 {
1553   basic_block bb;
1554   struct version_info *info;
1555
1556   if (TREE_CODE (op) != SSA_NAME
1557       || virtual_operand_p (op))
1558     return;
1559
1560   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1561   if (bb
1562       && flow_bb_inside_loop_p (data->current_loop, bb))
1563     return;
1564
1565   info = name_info (data, op);
1566   info->name = op;
1567   info->has_nonlin_use |= nonlinear_use;
1568   if (!info->inv_id)
1569     info->inv_id = ++data->max_inv_var_id;
1570   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1571 }
1572
1573 /* Record a group of TYPE.  */
1574
1575 static struct iv_group *
1576 record_group (struct ivopts_data *data, enum use_type type)
1577 {
1578   struct iv_group *group = XCNEW (struct iv_group);
1579
1580   group->id = data->vgroups.length ();
1581   group->type = type;
1582   group->related_cands = BITMAP_ALLOC (NULL);
1583   group->vuses.create (1);
1584   group->doloop_p = false;
1585
1586   data->vgroups.safe_push (group);
1587   return group;
1588 }
1589
1590 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1591    New group will be created if there is no existing group for the use.
1592    MEM_TYPE is the type of memory being addressed, or NULL if this
1593    isn't an address reference.  */
1594
1595 static struct iv_use *
1596 record_group_use (struct ivopts_data *data, tree *use_p,
1597                   struct iv *iv, gimple *stmt, enum use_type type,
1598                   tree mem_type)
1599 {
1600   tree addr_base = NULL;
1601   struct iv_group *group = NULL;
1602   poly_uint64 addr_offset = 0;
1603
1604   /* Record non address type use in a new group.  */
1605   if (address_p (type))
1606     {
1607       unsigned int i;
1608
1609       gcc_assert (POINTER_TYPE_P (TREE_TYPE (iv->base)));
1610       tree addr_toffset;
1611       split_constant_offset (iv->base, &addr_base, &addr_toffset);
1612       addr_offset = int_cst_value (addr_toffset);
1613       for (i = 0; i < data->vgroups.length (); i++)
1614         {
1615           struct iv_use *use;
1616
1617           group = data->vgroups[i];
1618           use = group->vuses[0];
1619           if (!address_p (use->type))
1620             continue;
1621
1622           /* Check if it has the same stripped base and step.  */
1623           if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1624               && operand_equal_p (iv->step, use->iv->step, 0)
1625               && operand_equal_p (addr_base, use->addr_base, 0))
1626             break;
1627         }
1628       if (i == data->vgroups.length ())
1629         group = NULL;
1630     }
1631
1632   if (!group)
1633     group = record_group (data, type);
1634
1635   return record_use (group, use_p, iv, stmt, type, mem_type,
1636                      addr_base, addr_offset);
1637 }
1638
1639 /* Checks whether the use OP is interesting and if so, records it.  */
1640
1641 static struct iv_use *
1642 find_interesting_uses_op (struct ivopts_data *data, tree op)
1643 {
1644   struct iv *iv;
1645   gimple *stmt;
1646   struct iv_use *use;
1647
1648   if (TREE_CODE (op) != SSA_NAME)
1649     return NULL;
1650
1651   iv = get_iv (data, op);
1652   if (!iv)
1653     return NULL;
1654
1655   if (iv->nonlin_use)
1656     {
1657       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1658       return iv->nonlin_use;
1659     }
1660
1661   if (integer_zerop (iv->step))
1662     {
1663       record_invariant (data, op, true);
1664       return NULL;
1665     }
1666
1667   stmt = SSA_NAME_DEF_STMT (op);
1668   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1669
1670   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1671   iv->nonlin_use = use;
1672   return use;
1673 }
1674
1675 /* Indicate how compare type iv_use can be handled.  */
1676 enum comp_iv_rewrite
1677 {
1678   COMP_IV_NA,
1679   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1680   COMP_IV_EXPR,
1681   /* We may rewrite compare type iv_uses on both sides of comparison by
1682      expressing value of each iv_use.  */
1683   COMP_IV_EXPR_2,
1684   /* We may rewrite compare type iv_use by expressing value of the iv_use
1685      or by eliminating it with other iv_cand.  */
1686   COMP_IV_ELIM
1687 };
1688
1689 /* Given a condition in statement STMT, checks whether it is a compare
1690    of an induction variable and an invariant.  If this is the case,
1691    CONTROL_VAR is set to location of the iv, BOUND to the location of
1692    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1693    induction variable descriptions, and true is returned.  If this is not
1694    the case, CONTROL_VAR and BOUND are set to the arguments of the
1695    condition and false is returned.  */
1696
1697 static enum comp_iv_rewrite
1698 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1699                        tree **control_var, tree **bound,
1700                        struct iv **iv_var, struct iv **iv_bound)
1701 {
1702   /* The objects returned when COND has constant operands.  */
1703   static struct iv const_iv;
1704   static tree zero;
1705   tree *op0 = &zero, *op1 = &zero;
1706   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1707   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1708
1709   if (gimple_code (stmt) == GIMPLE_COND)
1710     {
1711       gcond *cond_stmt = as_a <gcond *> (stmt);
1712       op0 = gimple_cond_lhs_ptr (cond_stmt);
1713       op1 = gimple_cond_rhs_ptr (cond_stmt);
1714     }
1715   else
1716     {
1717       op0 = gimple_assign_rhs1_ptr (stmt);
1718       op1 = gimple_assign_rhs2_ptr (stmt);
1719     }
1720
1721   zero = integer_zero_node;
1722   const_iv.step = integer_zero_node;
1723
1724   if (TREE_CODE (*op0) == SSA_NAME)
1725     iv0 = get_iv (data, *op0);
1726   if (TREE_CODE (*op1) == SSA_NAME)
1727     iv1 = get_iv (data, *op1);
1728
1729   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1730   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1731     {
1732       rewrite_type = COMP_IV_EXPR_2;
1733       goto end;
1734     }
1735
1736   /* If none side of comparison is IV.  */
1737   if ((!iv0 || integer_zerop (iv0->step))
1738       && (!iv1 || integer_zerop (iv1->step)))
1739     goto end;
1740
1741   /* Control variable may be on the other side.  */
1742   if (!iv0 || integer_zerop (iv0->step))
1743     {
1744       std::swap (op0, op1);
1745       std::swap (iv0, iv1);
1746     }
1747   /* If one side is IV and the other side isn't loop invariant.  */
1748   if (!iv1)
1749     rewrite_type = COMP_IV_EXPR;
1750   /* If one side is IV and the other side is loop invariant.  */
1751   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1752     rewrite_type = COMP_IV_ELIM;
1753
1754 end:
1755   if (control_var)
1756     *control_var = op0;
1757   if (iv_var)
1758     *iv_var = iv0;
1759   if (bound)
1760     *bound = op1;
1761   if (iv_bound)
1762     *iv_bound = iv1;
1763
1764   return rewrite_type;
1765 }
1766
1767 /* Checks whether the condition in STMT is interesting and if so,
1768    records it.  */
1769
1770 static void
1771 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1772 {
1773   tree *var_p, *bound_p;
1774   struct iv *var_iv, *bound_iv;
1775   enum comp_iv_rewrite ret;
1776
1777   ret = extract_cond_operands (data, stmt,
1778                                &var_p, &bound_p, &var_iv, &bound_iv);
1779   if (ret == COMP_IV_NA)
1780     {
1781       find_interesting_uses_op (data, *var_p);
1782       find_interesting_uses_op (data, *bound_p);
1783       return;
1784     }
1785
1786   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1787   /* Record compare type iv_use for iv on the other side of comparison.  */
1788   if (ret == COMP_IV_EXPR_2)
1789     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1790 }
1791
1792 /* Returns the outermost loop EXPR is obviously invariant in
1793    relative to the loop LOOP, i.e. if all its operands are defined
1794    outside of the returned loop.  Returns NULL if EXPR is not
1795    even obviously invariant in LOOP.  */
1796
1797 class loop *
1798 outermost_invariant_loop_for_expr (class loop *loop, tree expr)
1799 {
1800   basic_block def_bb;
1801   unsigned i, len;
1802
1803   if (is_gimple_min_invariant (expr))
1804     return current_loops->tree_root;
1805
1806   if (TREE_CODE (expr) == SSA_NAME)
1807     {
1808       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1809       if (def_bb)
1810         {
1811           if (flow_bb_inside_loop_p (loop, def_bb))
1812             return NULL;
1813           return superloop_at_depth (loop,
1814                                      loop_depth (def_bb->loop_father) + 1);
1815         }
1816
1817       return current_loops->tree_root;
1818     }
1819
1820   if (!EXPR_P (expr))
1821     return NULL;
1822
1823   unsigned maxdepth = 0;
1824   len = TREE_OPERAND_LENGTH (expr);
1825   for (i = 0; i < len; i++)
1826     {
1827       class loop *ivloop;
1828       if (!TREE_OPERAND (expr, i))
1829         continue;
1830
1831       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1832       if (!ivloop)
1833         return NULL;
1834       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1835     }
1836
1837   return superloop_at_depth (loop, maxdepth);
1838 }
1839
1840 /* Returns true if expression EXPR is obviously invariant in LOOP,
1841    i.e. if all its operands are defined outside of the LOOP.  LOOP
1842    should not be the function body.  */
1843
1844 bool
1845 expr_invariant_in_loop_p (class loop *loop, tree expr)
1846 {
1847   basic_block def_bb;
1848   unsigned i, len;
1849
1850   gcc_assert (loop_depth (loop) > 0);
1851
1852   if (is_gimple_min_invariant (expr))
1853     return true;
1854
1855   if (TREE_CODE (expr) == SSA_NAME)
1856     {
1857       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1858       if (def_bb
1859           && flow_bb_inside_loop_p (loop, def_bb))
1860         return false;
1861
1862       return true;
1863     }
1864
1865   if (!EXPR_P (expr))
1866     return false;
1867
1868   len = TREE_OPERAND_LENGTH (expr);
1869   for (i = 0; i < len; i++)
1870     if (TREE_OPERAND (expr, i)
1871         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1872       return false;
1873
1874   return true;
1875 }
1876
1877 /* Given expression EXPR which computes inductive values with respect
1878    to loop recorded in DATA, this function returns biv from which EXPR
1879    is derived by tracing definition chains of ssa variables in EXPR.  */
1880
1881 static struct iv*
1882 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1883 {
1884   struct iv *iv;
1885   unsigned i, n;
1886   tree e2, e1;
1887   enum tree_code code;
1888   gimple *stmt;
1889
1890   if (expr == NULL_TREE)
1891     return NULL;
1892
1893   if (is_gimple_min_invariant (expr))
1894     return NULL;
1895
1896   code = TREE_CODE (expr);
1897   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1898     {
1899       n = TREE_OPERAND_LENGTH (expr);
1900       for (i = 0; i < n; i++)
1901         {
1902           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1903           if (iv)
1904             return iv;
1905         }
1906     }
1907
1908   /* Stop if it's not ssa name.  */
1909   if (code != SSA_NAME)
1910     return NULL;
1911
1912   iv = get_iv (data, expr);
1913   if (!iv || integer_zerop (iv->step))
1914     return NULL;
1915   else if (iv->biv_p)
1916     return iv;
1917
1918   stmt = SSA_NAME_DEF_STMT (expr);
1919   if (gphi *phi = dyn_cast <gphi *> (stmt))
1920     {
1921       ssa_op_iter iter;
1922       use_operand_p use_p;
1923       basic_block phi_bb = gimple_bb (phi);
1924
1925       /* Skip loop header PHI that doesn't define biv.  */
1926       if (phi_bb->loop_father == data->current_loop)
1927         return NULL;
1928
1929       if (virtual_operand_p (gimple_phi_result (phi)))
1930         return NULL;
1931
1932       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1933         {
1934           tree use = USE_FROM_PTR (use_p);
1935           iv = find_deriving_biv_for_expr (data, use);
1936           if (iv)
1937             return iv;
1938         }
1939       return NULL;
1940     }
1941   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1942     return NULL;
1943
1944   e1 = gimple_assign_rhs1 (stmt);
1945   code = gimple_assign_rhs_code (stmt);
1946   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1947     return find_deriving_biv_for_expr (data, e1);
1948
1949   switch (code)
1950     {
1951     case MULT_EXPR:
1952     case PLUS_EXPR:
1953     case MINUS_EXPR:
1954     case POINTER_PLUS_EXPR:
1955       /* Increments, decrements and multiplications by a constant
1956          are simple.  */
1957       e2 = gimple_assign_rhs2 (stmt);
1958       iv = find_deriving_biv_for_expr (data, e2);
1959       if (iv)
1960         return iv;
1961       gcc_fallthrough ();
1962
1963     CASE_CONVERT:
1964       /* Casts are simple.  */
1965       return find_deriving_biv_for_expr (data, e1);
1966
1967     default:
1968       break;
1969     }
1970
1971   return NULL;
1972 }
1973
1974 /* Record BIV, its predecessor and successor that they are used in
1975    address type uses.  */
1976
1977 static void
1978 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1979 {
1980   unsigned i;
1981   tree type, base_1, base_2;
1982   bitmap_iterator bi;
1983
1984   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1985       || biv->have_address_use || !biv->no_overflow)
1986     return;
1987
1988   type = TREE_TYPE (biv->base);
1989   if (!INTEGRAL_TYPE_P (type))
1990     return;
1991
1992   biv->have_address_use = true;
1993   data->bivs_not_used_in_addr--;
1994   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1995   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1996     {
1997       struct iv *iv = ver_info (data, i)->iv;
1998
1999       if (!iv || !iv->biv_p || integer_zerop (iv->step)
2000           || iv->have_address_use || !iv->no_overflow)
2001         continue;
2002
2003       if (type != TREE_TYPE (iv->base)
2004           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2005         continue;
2006
2007       if (!operand_equal_p (biv->step, iv->step, 0))
2008         continue;
2009
2010       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2011       if (operand_equal_p (base_1, iv->base, 0)
2012           || operand_equal_p (base_2, biv->base, 0))
2013         {
2014           iv->have_address_use = true;
2015           data->bivs_not_used_in_addr--;
2016         }
2017     }
2018 }
2019
2020 /* Cumulates the steps of indices into DATA and replaces their values with the
2021    initial ones.  Returns false when the value of the index cannot be determined.
2022    Callback for for_each_index.  */
2023
2024 struct ifs_ivopts_data
2025 {
2026   struct ivopts_data *ivopts_data;
2027   gimple *stmt;
2028   tree step;
2029 };
2030
2031 static bool
2032 idx_find_step (tree base, tree *idx, void *data)
2033 {
2034   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2035   struct iv *iv;
2036   bool use_overflow_semantics = false;
2037   tree step, iv_base, iv_step, lbound, off;
2038   class loop *loop = dta->ivopts_data->current_loop;
2039
2040   /* If base is a component ref, require that the offset of the reference
2041      be invariant.  */
2042   if (TREE_CODE (base) == COMPONENT_REF)
2043     {
2044       off = component_ref_field_offset (base);
2045       return expr_invariant_in_loop_p (loop, off);
2046     }
2047
2048   /* If base is array, first check whether we will be able to move the
2049      reference out of the loop (in order to take its address in strength
2050      reduction).  In order for this to work we need both lower bound
2051      and step to be loop invariants.  */
2052   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2053     {
2054       /* Moreover, for a range, the size needs to be invariant as well.  */
2055       if (TREE_CODE (base) == ARRAY_RANGE_REF
2056           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2057         return false;
2058
2059       step = array_ref_element_size (base);
2060       lbound = array_ref_low_bound (base);
2061
2062       if (!expr_invariant_in_loop_p (loop, step)
2063           || !expr_invariant_in_loop_p (loop, lbound))
2064         return false;
2065     }
2066
2067   if (TREE_CODE (*idx) != SSA_NAME)
2068     return true;
2069
2070   iv = get_iv (dta->ivopts_data, *idx);
2071   if (!iv)
2072     return false;
2073
2074   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2075           *&x[0], which is not folded and does not trigger the
2076           ARRAY_REF path below.  */
2077   *idx = iv->base;
2078
2079   if (integer_zerop (iv->step))
2080     return true;
2081
2082   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2083     {
2084       step = array_ref_element_size (base);
2085
2086       /* We only handle addresses whose step is an integer constant.  */
2087       if (TREE_CODE (step) != INTEGER_CST)
2088         return false;
2089     }
2090   else
2091     /* The step for pointer arithmetics already is 1 byte.  */
2092     step = size_one_node;
2093
2094   iv_base = iv->base;
2095   iv_step = iv->step;
2096   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2097     use_overflow_semantics = true;
2098
2099   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2100                             sizetype, &iv_base, &iv_step, dta->stmt,
2101                             use_overflow_semantics))
2102     {
2103       /* The index might wrap.  */
2104       return false;
2105     }
2106
2107   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2108   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2109
2110   if (dta->ivopts_data->bivs_not_used_in_addr)
2111     {
2112       if (!iv->biv_p)
2113         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2114
2115       record_biv_for_address_use (dta->ivopts_data, iv);
2116     }
2117   return true;
2118 }
2119
2120 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2121    object is passed to it in DATA.  */
2122
2123 static bool
2124 idx_record_use (tree base, tree *idx,
2125                 void *vdata)
2126 {
2127   struct ivopts_data *data = (struct ivopts_data *) vdata;
2128   find_interesting_uses_op (data, *idx);
2129   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2130     {
2131       if (TREE_OPERAND (base, 2))
2132         find_interesting_uses_op (data, TREE_OPERAND (base, 2));
2133       if (TREE_OPERAND (base, 3))
2134         find_interesting_uses_op (data, TREE_OPERAND (base, 3));
2135     }
2136   return true;
2137 }
2138
2139 /* If we can prove that TOP = cst * BOT for some constant cst,
2140    store cst to MUL and return true.  Otherwise return false.
2141    The returned value is always sign-extended, regardless of the
2142    signedness of TOP and BOT.  */
2143
2144 static bool
2145 constant_multiple_of (tree top, tree bot, widest_int *mul)
2146 {
2147   tree mby;
2148   enum tree_code code;
2149   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2150   widest_int res, p0, p1;
2151
2152   STRIP_NOPS (top);
2153   STRIP_NOPS (bot);
2154
2155   if (operand_equal_p (top, bot, 0))
2156     {
2157       *mul = 1;
2158       return true;
2159     }
2160
2161   code = TREE_CODE (top);
2162   switch (code)
2163     {
2164     case MULT_EXPR:
2165       mby = TREE_OPERAND (top, 1);
2166       if (TREE_CODE (mby) != INTEGER_CST)
2167         return false;
2168
2169       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2170         return false;
2171
2172       *mul = wi::sext (res * wi::to_widest (mby), precision);
2173       return true;
2174
2175     case PLUS_EXPR:
2176     case MINUS_EXPR:
2177       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2178           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2179         return false;
2180
2181       if (code == MINUS_EXPR)
2182         p1 = -p1;
2183       *mul = wi::sext (p0 + p1, precision);
2184       return true;
2185
2186     case INTEGER_CST:
2187       if (TREE_CODE (bot) != INTEGER_CST)
2188         return false;
2189
2190       p0 = widest_int::from (wi::to_wide (top), SIGNED);
2191       p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2192       if (p1 == 0)
2193         return false;
2194       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2195       return res == 0;
2196
2197     default:
2198       if (POLY_INT_CST_P (top)
2199           && POLY_INT_CST_P (bot)
2200           && constant_multiple_p (wi::to_poly_widest (top),
2201                                   wi::to_poly_widest (bot), mul))
2202         return true;
2203
2204       return false;
2205     }
2206 }
2207
2208 /* Return true if memory reference REF with step STEP may be unaligned.  */
2209
2210 static bool
2211 may_be_unaligned_p (tree ref, tree step)
2212 {
2213   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2214      thus they are not misaligned.  */
2215   if (TREE_CODE (ref) == TARGET_MEM_REF)
2216     return false;
2217
2218   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2219   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2220     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2221
2222   unsigned HOST_WIDE_INT bitpos;
2223   unsigned int ref_align;
2224   get_object_alignment_1 (ref, &ref_align, &bitpos);
2225   if (ref_align < align
2226       || (bitpos % align) != 0
2227       || (bitpos % BITS_PER_UNIT) != 0)
2228     return true;
2229
2230   unsigned int trailing_zeros = tree_ctz (step);
2231   if (trailing_zeros < HOST_BITS_PER_INT
2232       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2233     return true;
2234
2235   return false;
2236 }
2237
2238 /* Return true if EXPR may be non-addressable.   */
2239
2240 bool
2241 may_be_nonaddressable_p (tree expr)
2242 {
2243   switch (TREE_CODE (expr))
2244     {
2245     case VAR_DECL:
2246       /* Check if it's a register variable.  */
2247       return DECL_HARD_REGISTER (expr);
2248
2249     case TARGET_MEM_REF:
2250       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2251          target, thus they are always addressable.  */
2252       return false;
2253
2254     case MEM_REF:
2255       /* Likewise for MEM_REFs, modulo the storage order.  */
2256       return REF_REVERSE_STORAGE_ORDER (expr);
2257
2258     case BIT_FIELD_REF:
2259       if (REF_REVERSE_STORAGE_ORDER (expr))
2260         return true;
2261       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2262
2263     case COMPONENT_REF:
2264       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2265         return true;
2266       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2267              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2268
2269     case ARRAY_REF:
2270     case ARRAY_RANGE_REF:
2271       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2272         return true;
2273       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2274
2275     case VIEW_CONVERT_EXPR:
2276       /* This kind of view-conversions may wrap non-addressable objects
2277          and make them look addressable.  After some processing the
2278          non-addressability may be uncovered again, causing ADDR_EXPRs
2279          of inappropriate objects to be built.  */
2280       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2281           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2282         return true;
2283       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2284
2285     CASE_CONVERT:
2286       return true;
2287
2288     default:
2289       break;
2290     }
2291
2292   return false;
2293 }
2294
2295 /* Finds addresses in *OP_P inside STMT.  */
2296
2297 static void
2298 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2299                                tree *op_p)
2300 {
2301   tree base = *op_p, step = size_zero_node;
2302   struct iv *civ;
2303   struct ifs_ivopts_data ifs_ivopts_data;
2304
2305   /* Do not play with volatile memory references.  A bit too conservative,
2306      perhaps, but safe.  */
2307   if (gimple_has_volatile_ops (stmt))
2308     goto fail;
2309
2310   /* Ignore bitfields for now.  Not really something terribly complicated
2311      to handle.  TODO.  */
2312   if (TREE_CODE (base) == BIT_FIELD_REF)
2313     goto fail;
2314
2315   base = unshare_expr (base);
2316
2317   if (TREE_CODE (base) == TARGET_MEM_REF)
2318     {
2319       tree type = build_pointer_type (TREE_TYPE (base));
2320       tree astep;
2321
2322       if (TMR_BASE (base)
2323           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2324         {
2325           civ = get_iv (data, TMR_BASE (base));
2326           if (!civ)
2327             goto fail;
2328
2329           TMR_BASE (base) = civ->base;
2330           step = civ->step;
2331         }
2332       if (TMR_INDEX2 (base)
2333           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2334         {
2335           civ = get_iv (data, TMR_INDEX2 (base));
2336           if (!civ)
2337             goto fail;
2338
2339           TMR_INDEX2 (base) = civ->base;
2340           step = civ->step;
2341         }
2342       if (TMR_INDEX (base)
2343           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2344         {
2345           civ = get_iv (data, TMR_INDEX (base));
2346           if (!civ)
2347             goto fail;
2348
2349           TMR_INDEX (base) = civ->base;
2350           astep = civ->step;
2351
2352           if (astep)
2353             {
2354               if (TMR_STEP (base))
2355                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2356
2357               step = fold_build2 (PLUS_EXPR, type, step, astep);
2358             }
2359         }
2360
2361       if (integer_zerop (step))
2362         goto fail;
2363       base = tree_mem_ref_addr (type, base);
2364     }
2365   else
2366     {
2367       ifs_ivopts_data.ivopts_data = data;
2368       ifs_ivopts_data.stmt = stmt;
2369       ifs_ivopts_data.step = size_zero_node;
2370       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2371           || integer_zerop (ifs_ivopts_data.step))
2372         goto fail;
2373       step = ifs_ivopts_data.step;
2374
2375       /* Check that the base expression is addressable.  This needs
2376          to be done after substituting bases of IVs into it.  */
2377       if (may_be_nonaddressable_p (base))
2378         goto fail;
2379
2380       /* Moreover, on strict alignment platforms, check that it is
2381          sufficiently aligned.  */
2382       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2383         goto fail;
2384
2385       base = build_fold_addr_expr (base);
2386
2387       /* Substituting bases of IVs into the base expression might
2388          have caused folding opportunities.  */
2389       if (TREE_CODE (base) == ADDR_EXPR)
2390         {
2391           tree *ref = &TREE_OPERAND (base, 0);
2392           while (handled_component_p (*ref))
2393             ref = &TREE_OPERAND (*ref, 0);
2394           if (TREE_CODE (*ref) == MEM_REF)
2395             {
2396               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2397                                       TREE_OPERAND (*ref, 0),
2398                                       TREE_OPERAND (*ref, 1));
2399               if (tem)
2400                 *ref = tem;
2401             }
2402         }
2403     }
2404
2405   civ = alloc_iv (data, base, step);
2406   /* Fail if base object of this memory reference is unknown.  */
2407   if (civ->base_object == NULL_TREE)
2408     goto fail;
2409
2410   record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2411   return;
2412
2413 fail:
2414   for_each_index (op_p, idx_record_use, data);
2415 }
2416
2417 /* Finds and records invariants used in STMT.  */
2418
2419 static void
2420 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2421 {
2422   ssa_op_iter iter;
2423   use_operand_p use_p;
2424   tree op;
2425
2426   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2427     {
2428       op = USE_FROM_PTR (use_p);
2429       record_invariant (data, op, false);
2430     }
2431 }
2432
2433 /* CALL calls an internal function.  If operand *OP_P will become an
2434    address when the call is expanded, return the type of the memory
2435    being addressed, otherwise return null.  */
2436
2437 static tree
2438 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2439 {
2440   switch (gimple_call_internal_fn (call))
2441     {
2442     case IFN_MASK_LOAD:
2443     case IFN_MASK_LOAD_LANES:
2444     case IFN_LEN_LOAD:
2445     case IFN_LEN_MASK_LOAD:
2446       if (op_p == gimple_call_arg_ptr (call, 0))
2447         return TREE_TYPE (gimple_call_lhs (call));
2448       return NULL_TREE;
2449
2450     case IFN_MASK_STORE:
2451     case IFN_MASK_STORE_LANES:
2452     case IFN_LEN_STORE:
2453     case IFN_LEN_MASK_STORE:
2454       {
2455         if (op_p == gimple_call_arg_ptr (call, 0))
2456           {
2457             internal_fn ifn = gimple_call_internal_fn (call);
2458             int index = internal_fn_stored_value_index (ifn);
2459             return TREE_TYPE (gimple_call_arg (call, index));
2460           }
2461         return NULL_TREE;
2462       }
2463
2464     default:
2465       return NULL_TREE;
2466     }
2467 }
2468
2469 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2470    Return true if the operand will become an address when STMT
2471    is expanded and record the associated address use if so.  */
2472
2473 static bool
2474 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2475                        struct iv *iv)
2476 {
2477   /* Fail if base object of this memory reference is unknown.  */
2478   if (iv->base_object == NULL_TREE)
2479     return false;
2480
2481   tree mem_type = NULL_TREE;
2482   if (gcall *call = dyn_cast <gcall *> (stmt))
2483     if (gimple_call_internal_p (call))
2484       mem_type = get_mem_type_for_internal_fn (call, op_p);
2485   if (mem_type)
2486     {
2487       iv = alloc_iv (data, iv->base, iv->step);
2488       record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2489       return true;
2490     }
2491   return false;
2492 }
2493
2494 /* Finds interesting uses of induction variables in the statement STMT.  */
2495
2496 static void
2497 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2498 {
2499   struct iv *iv;
2500   tree op, *lhs, *rhs;
2501   ssa_op_iter iter;
2502   use_operand_p use_p;
2503   enum tree_code code;
2504
2505   find_invariants_stmt (data, stmt);
2506
2507   if (gimple_code (stmt) == GIMPLE_COND)
2508     {
2509       find_interesting_uses_cond (data, stmt);
2510       return;
2511     }
2512
2513   if (is_gimple_assign (stmt))
2514     {
2515       lhs = gimple_assign_lhs_ptr (stmt);
2516       rhs = gimple_assign_rhs1_ptr (stmt);
2517
2518       if (TREE_CODE (*lhs) == SSA_NAME)
2519         {
2520           /* If the statement defines an induction variable, the uses are not
2521              interesting by themselves.  */
2522
2523           iv = get_iv (data, *lhs);
2524
2525           if (iv && !integer_zerop (iv->step))
2526             return;
2527         }
2528
2529       code = gimple_assign_rhs_code (stmt);
2530       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2531           && (REFERENCE_CLASS_P (*rhs)
2532               || is_gimple_val (*rhs)))
2533         {
2534           if (REFERENCE_CLASS_P (*rhs))
2535             find_interesting_uses_address (data, stmt, rhs);
2536           else
2537             find_interesting_uses_op (data, *rhs);
2538
2539           if (REFERENCE_CLASS_P (*lhs))
2540             find_interesting_uses_address (data, stmt, lhs);
2541           return;
2542         }
2543       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2544         {
2545           find_interesting_uses_cond (data, stmt);
2546           return;
2547         }
2548
2549       /* TODO -- we should also handle address uses of type
2550
2551          memory = call (whatever);
2552
2553          and
2554
2555          call (memory).  */
2556     }
2557
2558   if (gimple_code (stmt) == GIMPLE_PHI
2559       && gimple_bb (stmt) == data->current_loop->header)
2560     {
2561       iv = get_iv (data, PHI_RESULT (stmt));
2562
2563       if (iv && !integer_zerop (iv->step))
2564         return;
2565     }
2566
2567   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2568     {
2569       op = USE_FROM_PTR (use_p);
2570
2571       if (TREE_CODE (op) != SSA_NAME)
2572         continue;
2573
2574       iv = get_iv (data, op);
2575       if (!iv)
2576         continue;
2577
2578       if (!find_address_like_use (data, stmt, use_p->use, iv))
2579         find_interesting_uses_op (data, op);
2580     }
2581 }
2582
2583 /* Finds interesting uses of induction variables outside of loops
2584    on loop exit edge EXIT.  */
2585
2586 static void
2587 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2588 {
2589   gphi *phi;
2590   gphi_iterator psi;
2591   tree def;
2592
2593   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2594     {
2595       phi = psi.phi ();
2596       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2597       if (!virtual_operand_p (def))
2598         find_interesting_uses_op (data, def);
2599     }
2600 }
2601
2602 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2603    mode for memory reference represented by USE.  */
2604
2605 static GTY (()) vec<rtx, va_gc> *addr_list;
2606
2607 static bool
2608 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2609 {
2610   rtx reg, addr;
2611   unsigned list_index;
2612   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2613   machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2614
2615   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2616   if (list_index >= vec_safe_length (addr_list))
2617     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE, true);
2618
2619   addr = (*addr_list)[list_index];
2620   if (!addr)
2621     {
2622       addr_mode = targetm.addr_space.address_mode (as);
2623       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2624       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2625       (*addr_list)[list_index] = addr;
2626     }
2627   else
2628     addr_mode = GET_MODE (addr);
2629
2630   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2631   return (memory_address_addr_space_p (mem_mode, addr, as));
2632 }
2633
2634 /* Comparison function to sort group in ascending order of addr_offset.  */
2635
2636 static int
2637 group_compare_offset (const void *a, const void *b)
2638 {
2639   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2640   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2641
2642   return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2643 }
2644
2645 /* Check if small groups should be split.  Return true if no group
2646    contains more than two uses with distinct addr_offsets.  Return
2647    false otherwise.  We want to split such groups because:
2648
2649      1) Small groups don't have much benefit and may interfer with
2650         general candidate selection.
2651      2) Size for problem with only small groups is usually small and
2652         general algorithm can handle it well.
2653
2654    TODO -- Above claim may not hold when we want to merge memory
2655    accesses with conseuctive addresses.  */
2656
2657 static bool
2658 split_small_address_groups_p (struct ivopts_data *data)
2659 {
2660   unsigned int i, j, distinct = 1;
2661   struct iv_use *pre;
2662   struct iv_group *group;
2663
2664   for (i = 0; i < data->vgroups.length (); i++)
2665     {
2666       group = data->vgroups[i];
2667       if (group->vuses.length () == 1)
2668         continue;
2669
2670       gcc_assert (address_p (group->type));
2671       if (group->vuses.length () == 2)
2672         {
2673           if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2674                                       group->vuses[1]->addr_offset) > 0)
2675             std::swap (group->vuses[0], group->vuses[1]);
2676         }
2677       else
2678         group->vuses.qsort (group_compare_offset);
2679
2680       if (distinct > 2)
2681         continue;
2682
2683       distinct = 1;
2684       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2685         {
2686           if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2687             {
2688               pre = group->vuses[j];
2689               distinct++;
2690             }
2691
2692           if (distinct > 2)
2693             break;
2694         }
2695     }
2696
2697   return (distinct <= 2);
2698 }
2699
2700 /* For each group of address type uses, this function further groups
2701    these uses according to the maximum offset supported by target's
2702    [base + offset] addressing mode.  */
2703
2704 static void
2705 split_address_groups (struct ivopts_data *data)
2706 {
2707   unsigned int i, j;
2708   /* Always split group.  */
2709   bool split_p = split_small_address_groups_p (data);
2710
2711   for (i = 0; i < data->vgroups.length (); i++)
2712     {
2713       struct iv_group *new_group = NULL;
2714       struct iv_group *group = data->vgroups[i];
2715       struct iv_use *use = group->vuses[0];
2716
2717       use->id = 0;
2718       use->group_id = group->id;
2719       if (group->vuses.length () == 1)
2720         continue;
2721
2722       gcc_assert (address_p (use->type));
2723
2724       for (j = 1; j < group->vuses.length ();)
2725         {
2726           struct iv_use *next = group->vuses[j];
2727           poly_int64 offset = next->addr_offset - use->addr_offset;
2728
2729           /* Split group if aksed to, or the offset against the first
2730              use can't fit in offset part of addressing mode.  IV uses
2731              having the same offset are still kept in one group.  */
2732           if (maybe_ne (offset, 0)
2733               && (split_p || !addr_offset_valid_p (use, offset)))
2734             {
2735               if (!new_group)
2736                 new_group = record_group (data, group->type);
2737               group->vuses.ordered_remove (j);
2738               new_group->vuses.safe_push (next);
2739               continue;
2740             }
2741
2742           next->id = j;
2743           next->group_id = group->id;
2744           j++;
2745         }
2746     }
2747 }
2748
2749 /* Finds uses of the induction variables that are interesting.  */
2750
2751 static void
2752 find_interesting_uses (struct ivopts_data *data, basic_block *body)
2753 {
2754   basic_block bb;
2755   gimple_stmt_iterator bsi;
2756   unsigned i;
2757   edge e;
2758
2759   for (i = 0; i < data->current_loop->num_nodes; i++)
2760     {
2761       edge_iterator ei;
2762       bb = body[i];
2763
2764       FOR_EACH_EDGE (e, ei, bb->succs)
2765         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2766             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2767           find_interesting_uses_outside (data, e);
2768
2769       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2770         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2771       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2772         if (!is_gimple_debug (gsi_stmt (bsi)))
2773           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2774     }
2775
2776   split_address_groups (data);
2777
2778   if (dump_file && (dump_flags & TDF_DETAILS))
2779     {
2780       fprintf (dump_file, "\n<IV Groups>:\n");
2781       dump_groups (dump_file, data);
2782       fprintf (dump_file, "\n");
2783     }
2784 }
2785
2786 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2787    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2788    we are at the top-level of the processed address.  */
2789
2790 static tree
2791 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2792                 poly_int64 *offset)
2793 {
2794   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2795   enum tree_code code;
2796   tree type, orig_type = TREE_TYPE (expr);
2797   poly_int64 off0, off1;
2798   HOST_WIDE_INT st;
2799   tree orig_expr = expr;
2800
2801   STRIP_NOPS (expr);
2802
2803   type = TREE_TYPE (expr);
2804   code = TREE_CODE (expr);
2805   *offset = 0;
2806
2807   switch (code)
2808     {
2809     case POINTER_PLUS_EXPR:
2810     case PLUS_EXPR:
2811     case MINUS_EXPR:
2812       op0 = TREE_OPERAND (expr, 0);
2813       op1 = TREE_OPERAND (expr, 1);
2814
2815       op0 = strip_offset_1 (op0, false, false, &off0);
2816       op1 = strip_offset_1 (op1, false, false, &off1);
2817
2818       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2819       if (op0 == TREE_OPERAND (expr, 0)
2820           && op1 == TREE_OPERAND (expr, 1))
2821         return orig_expr;
2822
2823       if (integer_zerop (op1))
2824         expr = op0;
2825       else if (integer_zerop (op0))
2826         {
2827           if (code == MINUS_EXPR)
2828             expr = fold_build1 (NEGATE_EXPR, type, op1);
2829           else
2830             expr = op1;
2831         }
2832       else
2833         expr = fold_build2 (code, type, op0, op1);
2834
2835       return fold_convert (orig_type, expr);
2836
2837     case MULT_EXPR:
2838       op1 = TREE_OPERAND (expr, 1);
2839       if (!cst_and_fits_in_hwi (op1))
2840         return orig_expr;
2841
2842       op0 = TREE_OPERAND (expr, 0);
2843       op0 = strip_offset_1 (op0, false, false, &off0);
2844       if (op0 == TREE_OPERAND (expr, 0))
2845         return orig_expr;
2846
2847       *offset = off0 * int_cst_value (op1);
2848       if (integer_zerop (op0))
2849         expr = op0;
2850       else
2851         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2852
2853       return fold_convert (orig_type, expr);
2854
2855     case ARRAY_REF:
2856     case ARRAY_RANGE_REF:
2857       if (!inside_addr)
2858         return orig_expr;
2859
2860       step = array_ref_element_size (expr);
2861       if (!cst_and_fits_in_hwi (step))
2862         break;
2863
2864       st = int_cst_value (step);
2865       op1 = TREE_OPERAND (expr, 1);
2866       op1 = strip_offset_1 (op1, false, false, &off1);
2867       *offset = off1 * st;
2868
2869       if (top_compref
2870           && integer_zerop (op1))
2871         {
2872           /* Strip the component reference completely.  */
2873           op0 = TREE_OPERAND (expr, 0);
2874           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2875           *offset += off0;
2876           return op0;
2877         }
2878       break;
2879
2880     case COMPONENT_REF:
2881       {
2882         tree field;
2883
2884         if (!inside_addr)
2885           return orig_expr;
2886
2887         tmp = component_ref_field_offset (expr);
2888         field = TREE_OPERAND (expr, 1);
2889         if (top_compref
2890             && cst_and_fits_in_hwi (tmp)
2891             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2892           {
2893             HOST_WIDE_INT boffset, abs_off;
2894
2895             /* Strip the component reference completely.  */
2896             op0 = TREE_OPERAND (expr, 0);
2897             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2898             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2899             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2900             if (boffset < 0)
2901               abs_off = -abs_off;
2902
2903             *offset = off0 + int_cst_value (tmp) + abs_off;
2904             return op0;
2905           }
2906       }
2907       break;
2908
2909     case ADDR_EXPR:
2910       op0 = TREE_OPERAND (expr, 0);
2911       op0 = strip_offset_1 (op0, true, true, &off0);
2912       *offset += off0;
2913
2914       if (op0 == TREE_OPERAND (expr, 0))
2915         return orig_expr;
2916
2917       expr = build_fold_addr_expr (op0);
2918       return fold_convert (orig_type, expr);
2919
2920     case MEM_REF:
2921       /* ???  Offset operand?  */
2922       inside_addr = false;
2923       break;
2924
2925     default:
2926       if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2927         return build_int_cst (orig_type, 0);
2928       return orig_expr;
2929     }
2930
2931   /* Default handling of expressions for that we want to recurse into
2932      the first operand.  */
2933   op0 = TREE_OPERAND (expr, 0);
2934   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2935   *offset += off0;
2936
2937   if (op0 == TREE_OPERAND (expr, 0)
2938       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2939     return orig_expr;
2940
2941   expr = copy_node (expr);
2942   TREE_OPERAND (expr, 0) = op0;
2943   if (op1)
2944     TREE_OPERAND (expr, 1) = op1;
2945
2946   /* Inside address, we might strip the top level component references,
2947      thus changing type of the expression.  Handling of ADDR_EXPR
2948      will fix that.  */
2949   expr = fold_convert (orig_type, expr);
2950
2951   return expr;
2952 }
2953
2954 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2955
2956 static tree
2957 strip_offset (tree expr, poly_uint64_pod *offset)
2958 {
2959   poly_int64 off;
2960   tree core = strip_offset_1 (expr, false, false, &off);
2961   *offset = off;
2962   return core;
2963 }
2964
2965 /* Returns variant of TYPE that can be used as base for different uses.
2966    We return unsigned type with the same precision, which avoids problems
2967    with overflows.  */
2968
2969 static tree
2970 generic_type_for (tree type)
2971 {
2972   if (POINTER_TYPE_P (type))
2973     return unsigned_type_for (type);
2974
2975   if (TYPE_UNSIGNED (type))
2976     return type;
2977
2978   return unsigned_type_for (type);
2979 }
2980
2981 /* Private data for walk_tree.  */
2982
2983 struct walk_tree_data
2984 {
2985   bitmap *inv_vars;
2986   struct ivopts_data *idata;
2987 };
2988
2989 /* Callback function for walk_tree, it records invariants and symbol
2990    reference in *EXPR_P.  DATA is the structure storing result info.  */
2991
2992 static tree
2993 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2994 {
2995   tree op = *expr_p;
2996   struct version_info *info;
2997   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2998
2999   if (TREE_CODE (op) != SSA_NAME)
3000     return NULL_TREE;
3001
3002   info = name_info (wdata->idata, op);
3003   /* Because we expand simple operations when finding IVs, loop invariant
3004      variable that isn't referred by the original loop could be used now.
3005      Record such invariant variables here.  */
3006   if (!info->iv)
3007     {
3008       struct ivopts_data *idata = wdata->idata;
3009       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
3010
3011       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
3012         {
3013           tree steptype = TREE_TYPE (op);
3014           if (POINTER_TYPE_P (steptype))
3015             steptype = sizetype;
3016           set_iv (idata, op, op, build_int_cst (steptype, 0), true);
3017           record_invariant (idata, op, false);
3018         }
3019     }
3020   if (!info->inv_id || info->has_nonlin_use)
3021     return NULL_TREE;
3022
3023   if (!*wdata->inv_vars)
3024     *wdata->inv_vars = BITMAP_ALLOC (NULL);
3025   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3026
3027   return NULL_TREE;
3028 }
3029
3030 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
3031    store it.  */
3032
3033 static inline void
3034 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3035 {
3036   struct walk_tree_data wdata;
3037
3038   if (!inv_vars)
3039     return;
3040
3041   wdata.idata = data;
3042   wdata.inv_vars = inv_vars;
3043   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3044 }
3045
3046 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
3047    will be recorded if it doesn't exist yet.  Given below two exprs:
3048      inv_expr + cst1, inv_expr + cst2
3049    It's hard to make decision whether constant part should be stripped
3050    or not.  We choose to not strip based on below facts:
3051      1) We need to count ADD cost for constant part if it's stripped,
3052         which isn't always trivial where this functions is called.
3053      2) Stripping constant away may be conflict with following loop
3054         invariant hoisting pass.
3055      3) Not stripping constant away results in more invariant exprs,
3056         which usually leads to decision preferring lower reg pressure.  */
3057
3058 static iv_inv_expr_ent *
3059 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3060 {
3061   STRIP_NOPS (inv_expr);
3062
3063   if (poly_int_tree_p (inv_expr)
3064       || TREE_CODE (inv_expr) == SSA_NAME)
3065     return NULL;
3066
3067   /* Don't strip constant part away as we used to.  */
3068
3069   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
3070   struct iv_inv_expr_ent ent;
3071   ent.expr = inv_expr;
3072   ent.hash = iterative_hash_expr (inv_expr, 0);
3073   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3074
3075   if (!*slot)
3076     {
3077       *slot = XNEW (struct iv_inv_expr_ent);
3078       (*slot)->expr = inv_expr;
3079       (*slot)->hash = ent.hash;
3080       (*slot)->id = ++data->max_inv_expr_id;
3081     }
3082
3083   return *slot;
3084 }
3085
3086
3087 /* Return *TP if it is an SSA_NAME marked with TREE_VISITED, i.e., as
3088    unsuitable as ivopts candidates for potentially involving undefined
3089    behavior.  */
3090
3091 static tree
3092 find_ssa_undef (tree *tp, int *walk_subtrees, void *bb_)
3093 {
3094   basic_block bb = (basic_block) bb_;
3095   if (TREE_CODE (*tp) == SSA_NAME
3096       && ssa_name_maybe_undef_p (*tp)
3097       && !ssa_name_any_use_dominates_bb_p (*tp, bb))
3098     return *tp;
3099   if (!EXPR_P (*tp))
3100     *walk_subtrees = 0;
3101   return NULL;
3102 }
3103
3104 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3105    position to POS.  If USE is not NULL, the candidate is set as related to
3106    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
3107    replacement of the final value of the iv by a direct computation.  */
3108
3109 static struct iv_cand *
3110 add_candidate_1 (struct ivopts_data *data, tree base, tree step, bool important,
3111                  enum iv_position pos, struct iv_use *use,
3112                  gimple *incremented_at, struct iv *orig_iv = NULL,
3113                  bool doloop = false)
3114 {
3115   unsigned i;
3116   struct iv_cand *cand = NULL;
3117   tree type, orig_type;
3118
3119   gcc_assert (base && step);
3120
3121   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3122      live, but the ivopts code may replace a real pointer with one
3123      pointing before or after the memory block that is then adjusted
3124      into the memory block during the loop.  FIXME: It would likely be
3125      better to actually force the pointer live and still use ivopts;
3126      for example, it would be enough to write the pointer into memory
3127      and keep it there until after the loop.  */
3128   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3129     return NULL;
3130
3131   /* If BASE contains undefined SSA names make sure we only record
3132      the original IV.  */
3133   bool involves_undefs = false;
3134   if (walk_tree (&base, find_ssa_undef, data->current_loop->header, NULL))
3135     {
3136       if (pos != IP_ORIGINAL)
3137         return NULL;
3138       important = false;
3139       involves_undefs = true;
3140     }
3141
3142   /* For non-original variables, make sure their values are computed in a type
3143      that does not invoke undefined behavior on overflows (since in general,
3144      we cannot prove that these induction variables are non-wrapping).  */
3145   if (pos != IP_ORIGINAL)
3146     {
3147       orig_type = TREE_TYPE (base);
3148       type = generic_type_for (orig_type);
3149       if (type != orig_type)
3150         {
3151           base = fold_convert (type, base);
3152           step = fold_convert (type, step);
3153         }
3154     }
3155
3156   for (i = 0; i < data->vcands.length (); i++)
3157     {
3158       cand = data->vcands[i];
3159
3160       if (cand->pos != pos)
3161         continue;
3162
3163       if (cand->incremented_at != incremented_at
3164           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3165               && cand->ainc_use != use))
3166         continue;
3167
3168       if (operand_equal_p (base, cand->iv->base, 0)
3169           && operand_equal_p (step, cand->iv->step, 0)
3170           && (TYPE_PRECISION (TREE_TYPE (base))
3171               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3172         break;
3173     }
3174
3175   if (i == data->vcands.length ())
3176     {
3177       cand = XCNEW (struct iv_cand);
3178       cand->id = i;
3179       cand->iv = alloc_iv (data, base, step);
3180       cand->pos = pos;
3181       if (pos != IP_ORIGINAL)
3182         {
3183           if (doloop)
3184             cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop");
3185           else
3186             cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3187           cand->var_after = cand->var_before;
3188         }
3189       cand->important = important;
3190       cand->involves_undefs = involves_undefs;
3191       cand->incremented_at = incremented_at;
3192       cand->doloop_p = doloop;
3193       data->vcands.safe_push (cand);
3194
3195       if (!poly_int_tree_p (step))
3196         {
3197           find_inv_vars (data, &step, &cand->inv_vars);
3198
3199           iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3200           /* Share bitmap between inv_vars and inv_exprs for cand.  */
3201           if (inv_expr != NULL)
3202             {
3203               cand->inv_exprs = cand->inv_vars;
3204               cand->inv_vars = NULL;
3205               if (cand->inv_exprs)
3206                 bitmap_clear (cand->inv_exprs);
3207               else
3208                 cand->inv_exprs = BITMAP_ALLOC (NULL);
3209
3210               bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3211             }
3212         }
3213
3214       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3215         cand->ainc_use = use;
3216       else
3217         cand->ainc_use = NULL;
3218
3219       cand->orig_iv = orig_iv;
3220       if (dump_file && (dump_flags & TDF_DETAILS))
3221         dump_cand (dump_file, cand);
3222     }
3223
3224   cand->important |= important;
3225   cand->doloop_p |= doloop;
3226
3227   /* Relate candidate to the group for which it is added.  */
3228   if (use)
3229     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3230
3231   return cand;
3232 }
3233
3234 /* Returns true if incrementing the induction variable at the end of the LOOP
3235    is allowed.
3236
3237    The purpose is to avoid splitting latch edge with a biv increment, thus
3238    creating a jump, possibly confusing other optimization passes and leaving
3239    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3240    available (so we do not have a better alternative), or if the latch edge
3241    is already nonempty.  */
3242
3243 static bool
3244 allow_ip_end_pos_p (class loop *loop)
3245 {
3246   if (!ip_normal_pos (loop))
3247     return true;
3248
3249   if (!empty_block_p (ip_end_pos (loop)))
3250     return true;
3251
3252   return false;
3253 }
3254
3255 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3256    Important field is set to IMPORTANT.  */
3257
3258 static void
3259 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3260                         bool important, struct iv_use *use)
3261 {
3262   basic_block use_bb = gimple_bb (use->stmt);
3263   machine_mode mem_mode;
3264   unsigned HOST_WIDE_INT cstepi;
3265
3266   /* If we insert the increment in any position other than the standard
3267      ones, we must ensure that it is incremented once per iteration.
3268      It must not be in an inner nested loop, or one side of an if
3269      statement.  */
3270   if (use_bb->loop_father != data->current_loop
3271       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3272       || stmt_can_throw_internal (cfun, use->stmt)
3273       || !cst_and_fits_in_hwi (step))
3274     return;
3275
3276   cstepi = int_cst_value (step);
3277
3278   mem_mode = TYPE_MODE (use->mem_type);
3279   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3280         || USE_STORE_PRE_INCREMENT (mem_mode))
3281        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3282       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3283            || USE_STORE_PRE_DECREMENT (mem_mode))
3284           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3285     {
3286       enum tree_code code = MINUS_EXPR;
3287       tree new_base;
3288       tree new_step = step;
3289
3290       if (POINTER_TYPE_P (TREE_TYPE (base)))
3291         {
3292           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3293           code = POINTER_PLUS_EXPR;
3294         }
3295       else
3296         new_step = fold_convert (TREE_TYPE (base), new_step);
3297       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3298       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3299                        use->stmt);
3300     }
3301   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3302         || USE_STORE_POST_INCREMENT (mem_mode))
3303        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3304       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3305            || USE_STORE_POST_DECREMENT (mem_mode))
3306           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3307     {
3308       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3309                        use->stmt);
3310     }
3311 }
3312
3313 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3314    position to POS.  If USE is not NULL, the candidate is set as related to
3315    it.  The candidate computation is scheduled before exit condition and at
3316    the end of loop.  */
3317
3318 static void
3319 add_candidate (struct ivopts_data *data, tree base, tree step, bool important,
3320                struct iv_use *use, struct iv *orig_iv = NULL,
3321                bool doloop = false)
3322 {
3323   if (ip_normal_pos (data->current_loop))
3324     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL, orig_iv,
3325                      doloop);
3326   /* Exclude doloop candidate here since it requires decrement then comparison
3327      and jump, the IP_END position doesn't match.  */
3328   if (!doloop && ip_end_pos (data->current_loop)
3329       && allow_ip_end_pos_p (data->current_loop))
3330     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3331 }
3332
3333 /* Adds standard iv candidates.  */
3334
3335 static void
3336 add_standard_iv_candidates (struct ivopts_data *data)
3337 {
3338   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3339
3340   /* The same for a double-integer type if it is still fast enough.  */
3341   if (TYPE_PRECISION
3342         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3343       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3344     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3345                    build_int_cst (long_integer_type_node, 1), true, NULL);
3346
3347   /* The same for a double-integer type if it is still fast enough.  */
3348   if (TYPE_PRECISION
3349         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3350       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3351     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3352                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3353 }
3354
3355
3356 /* Adds candidates bases on the old induction variable IV.  */
3357
3358 static void
3359 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3360 {
3361   gimple *phi;
3362   tree def;
3363   struct iv_cand *cand;
3364
3365   /* Check if this biv is used in address type use.  */
3366   if (iv->no_overflow  && iv->have_address_use
3367       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3368       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3369     {
3370       tree base = fold_convert (sizetype, iv->base);
3371       tree step = fold_convert (sizetype, iv->step);
3372
3373       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3374       add_candidate (data, base, step, true, NULL, iv);
3375       /* Add iv cand of the original type only if it has nonlinear use.  */
3376       if (iv->nonlin_use)
3377         add_candidate (data, iv->base, iv->step, true, NULL);
3378     }
3379   else
3380     add_candidate (data, iv->base, iv->step, true, NULL);
3381
3382   /* The same, but with initial value zero.  */
3383   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3384     add_candidate (data, size_int (0), iv->step, true, NULL);
3385   else
3386     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3387                    iv->step, true, NULL);
3388
3389   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3390   if (gimple_code (phi) == GIMPLE_PHI)
3391     {
3392       /* Additionally record the possibility of leaving the original iv
3393          untouched.  */
3394       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3395       /* Don't add candidate if it's from another PHI node because
3396          it's an affine iv appearing in the form of PEELED_CHREC.  */
3397       phi = SSA_NAME_DEF_STMT (def);
3398       if (gimple_code (phi) != GIMPLE_PHI)
3399         {
3400           cand = add_candidate_1 (data,
3401                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3402                                   SSA_NAME_DEF_STMT (def));
3403           if (cand)
3404             {
3405               cand->var_before = iv->ssa_name;
3406               cand->var_after = def;
3407             }
3408         }
3409       else
3410         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3411     }
3412 }
3413
3414 /* Adds candidates based on the old induction variables.  */
3415
3416 static void
3417 add_iv_candidate_for_bivs (struct ivopts_data *data)
3418 {
3419   unsigned i;
3420   struct iv *iv;
3421   bitmap_iterator bi;
3422
3423   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3424     {
3425       iv = ver_info (data, i)->iv;
3426       if (iv && iv->biv_p && !integer_zerop (iv->step))
3427         add_iv_candidate_for_biv (data, iv);
3428     }
3429 }
3430
3431 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3432
3433 static void
3434 record_common_cand (struct ivopts_data *data, tree base,
3435                     tree step, struct iv_use *use)
3436 {
3437   class iv_common_cand ent;
3438   class iv_common_cand **slot;
3439
3440   ent.base = base;
3441   ent.step = step;
3442   ent.hash = iterative_hash_expr (base, 0);
3443   ent.hash = iterative_hash_expr (step, ent.hash);
3444
3445   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3446   if (*slot == NULL)
3447     {
3448       *slot = new iv_common_cand ();
3449       (*slot)->base = base;
3450       (*slot)->step = step;
3451       (*slot)->uses.create (8);
3452       (*slot)->hash = ent.hash;
3453       data->iv_common_cands.safe_push ((*slot));
3454     }
3455
3456   gcc_assert (use != NULL);
3457   (*slot)->uses.safe_push (use);
3458   return;
3459 }
3460
3461 /* Comparison function used to sort common candidates.  */
3462
3463 static int
3464 common_cand_cmp (const void *p1, const void *p2)
3465 {
3466   unsigned n1, n2;
3467   const class iv_common_cand *const *const ccand1
3468     = (const class iv_common_cand *const *)p1;
3469   const class iv_common_cand *const *const ccand2
3470     = (const class iv_common_cand *const *)p2;
3471
3472   n1 = (*ccand1)->uses.length ();
3473   n2 = (*ccand2)->uses.length ();
3474   return n2 - n1;
3475 }
3476
3477 /* Adds IV candidates based on common candidated recorded.  */
3478
3479 static void
3480 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3481 {
3482   unsigned i, j;
3483   struct iv_cand *cand_1, *cand_2;
3484
3485   data->iv_common_cands.qsort (common_cand_cmp);
3486   for (i = 0; i < data->iv_common_cands.length (); i++)
3487     {
3488       class iv_common_cand *ptr = data->iv_common_cands[i];
3489
3490       /* Only add IV candidate if it's derived from multiple uses.  */
3491       if (ptr->uses.length () <= 1)
3492         break;
3493
3494       cand_1 = NULL;
3495       cand_2 = NULL;
3496       if (ip_normal_pos (data->current_loop))
3497         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3498                                   false, IP_NORMAL, NULL, NULL);
3499
3500       if (ip_end_pos (data->current_loop)
3501           && allow_ip_end_pos_p (data->current_loop))
3502         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3503                                   false, IP_END, NULL, NULL);
3504
3505       /* Bind deriving uses and the new candidates.  */
3506       for (j = 0; j < ptr->uses.length (); j++)
3507         {
3508           struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3509           if (cand_1)
3510             bitmap_set_bit (group->related_cands, cand_1->id);
3511           if (cand_2)
3512             bitmap_set_bit (group->related_cands, cand_2->id);
3513         }
3514     }
3515
3516   /* Release data since it is useless from this point.  */
3517   data->iv_common_cand_tab->empty ();
3518   data->iv_common_cands.truncate (0);
3519 }
3520
3521 /* Adds candidates based on the value of USE's iv.  */
3522
3523 static void
3524 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3525 {
3526   poly_uint64 offset;
3527   tree base;
3528   struct iv *iv = use->iv;
3529   tree basetype = TREE_TYPE (iv->base);
3530
3531   /* Don't add candidate for iv_use with non integer, pointer or non-mode
3532      precision types, instead, add candidate for the corresponding scev in
3533      unsigned type with the same precision.  See PR93674 for more info.  */
3534   if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype))
3535       || !type_has_mode_precision_p (basetype))
3536     {
3537       basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype),
3538                                                  TYPE_UNSIGNED (basetype));
3539       add_candidate (data, fold_convert (basetype, iv->base),
3540                      fold_convert (basetype, iv->step), false, NULL);
3541       return;
3542     }
3543
3544   add_candidate (data, iv->base, iv->step, false, use);
3545
3546   /* Record common candidate for use in case it can be shared by others.  */
3547   record_common_cand (data, iv->base, iv->step, use);
3548
3549   /* Record common candidate with initial value zero.  */
3550   basetype = TREE_TYPE (iv->base);
3551   if (POINTER_TYPE_P (basetype))
3552     basetype = sizetype;
3553   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3554
3555   /* Compare the cost of an address with an unscaled index with the cost of
3556     an address with a scaled index and add candidate if useful.  */
3557   poly_int64 step;
3558   if (use != NULL
3559       && poly_int_tree_p (iv->step, &step)
3560       && address_p (use->type))
3561     {
3562       poly_int64 new_step;
3563       unsigned int fact = preferred_mem_scale_factor
3564         (use->iv->base,
3565          TYPE_MODE (use->mem_type),
3566          optimize_loop_for_speed_p (data->current_loop));
3567
3568       if (fact != 1
3569           && multiple_p (step, fact, &new_step))
3570         add_candidate (data, size_int (0),
3571                        wide_int_to_tree (sizetype, new_step),
3572                        true, NULL);
3573     }
3574
3575   /* Record common candidate with constant offset stripped in base.
3576      Like the use itself, we also add candidate directly for it.  */
3577   base = strip_offset (iv->base, &offset);
3578   if (maybe_ne (offset, 0U) || base != iv->base)
3579     {
3580       record_common_cand (data, base, iv->step, use);
3581       add_candidate (data, base, iv->step, false, use);
3582     }
3583
3584   /* Record common candidate with base_object removed in base.  */
3585   base = iv->base;
3586   STRIP_NOPS (base);
3587   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3588     {
3589       tree step = iv->step;
3590
3591       STRIP_NOPS (step);
3592       base = TREE_OPERAND (base, 1);
3593       step = fold_convert (sizetype, step);
3594       record_common_cand (data, base, step, use);
3595       /* Also record common candidate with offset stripped.  */
3596       tree alt_base, alt_offset;
3597       split_constant_offset (base, &alt_base, &alt_offset);
3598       if (!integer_zerop (alt_offset))
3599         record_common_cand (data, alt_base, step, use);
3600     }
3601
3602   /* At last, add auto-incremental candidates.  Make such variables
3603      important since other iv uses with same base object may be based
3604      on it.  */
3605   if (use != NULL && address_p (use->type))
3606     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3607 }
3608
3609 /* Adds candidates based on the uses.  */
3610
3611 static void
3612 add_iv_candidate_for_groups (struct ivopts_data *data)
3613 {
3614   unsigned i;
3615
3616   /* Only add candidate for the first use in group.  */
3617   for (i = 0; i < data->vgroups.length (); i++)
3618     {
3619       struct iv_group *group = data->vgroups[i];
3620
3621       gcc_assert (group->vuses[0] != NULL);
3622       add_iv_candidate_for_use (data, group->vuses[0]);
3623     }
3624   add_iv_candidate_derived_from_uses (data);
3625 }
3626
3627 /* Record important candidates and add them to related_cands bitmaps.  */
3628
3629 static void
3630 record_important_candidates (struct ivopts_data *data)
3631 {
3632   unsigned i;
3633   struct iv_group *group;
3634
3635   for (i = 0; i < data->vcands.length (); i++)
3636     {
3637       struct iv_cand *cand = data->vcands[i];
3638
3639       if (cand->important)
3640         bitmap_set_bit (data->important_candidates, i);
3641     }
3642
3643   data->consider_all_candidates = (data->vcands.length ()
3644                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3645
3646   /* Add important candidates to groups' related_cands bitmaps.  */
3647   for (i = 0; i < data->vgroups.length (); i++)
3648     {
3649       group = data->vgroups[i];
3650       bitmap_ior_into (group->related_cands, data->important_candidates);
3651     }
3652 }
3653
3654 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3655    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3656    we allocate a simple list to every use.  */
3657
3658 static void
3659 alloc_use_cost_map (struct ivopts_data *data)
3660 {
3661   unsigned i, size, s;
3662
3663   for (i = 0; i < data->vgroups.length (); i++)
3664     {
3665       struct iv_group *group = data->vgroups[i];
3666
3667       if (data->consider_all_candidates)
3668         size = data->vcands.length ();
3669       else
3670         {
3671           s = bitmap_count_bits (group->related_cands);
3672
3673           /* Round up to the power of two, so that moduling by it is fast.  */
3674           size = s ? (1 << ceil_log2 (s)) : 1;
3675         }
3676
3677       group->n_map_members = size;
3678       group->cost_map = XCNEWVEC (class cost_pair, size);
3679     }
3680 }
3681
3682 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3683    on invariants INV_VARS and that the value used in expressing it is
3684    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3685
3686 static void
3687 set_group_iv_cost (struct ivopts_data *data,
3688                    struct iv_group *group, struct iv_cand *cand,
3689                    comp_cost cost, bitmap inv_vars, tree value,
3690                    enum tree_code comp, bitmap inv_exprs)
3691 {
3692   unsigned i, s;
3693
3694   if (cost.infinite_cost_p ())
3695     {
3696       BITMAP_FREE (inv_vars);
3697       BITMAP_FREE (inv_exprs);
3698       return;
3699     }
3700
3701   if (data->consider_all_candidates)
3702     {
3703       group->cost_map[cand->id].cand = cand;
3704       group->cost_map[cand->id].cost = cost;
3705       group->cost_map[cand->id].inv_vars = inv_vars;
3706       group->cost_map[cand->id].inv_exprs = inv_exprs;
3707       group->cost_map[cand->id].value = value;
3708       group->cost_map[cand->id].comp = comp;
3709       return;
3710     }
3711
3712   /* n_map_members is a power of two, so this computes modulo.  */
3713   s = cand->id & (group->n_map_members - 1);
3714   for (i = s; i < group->n_map_members; i++)
3715     if (!group->cost_map[i].cand)
3716       goto found;
3717   for (i = 0; i < s; i++)
3718     if (!group->cost_map[i].cand)
3719       goto found;
3720
3721   gcc_unreachable ();
3722
3723 found:
3724   group->cost_map[i].cand = cand;
3725   group->cost_map[i].cost = cost;
3726   group->cost_map[i].inv_vars = inv_vars;
3727   group->cost_map[i].inv_exprs = inv_exprs;
3728   group->cost_map[i].value = value;
3729   group->cost_map[i].comp = comp;
3730 }
3731
3732 /* Gets cost of (GROUP, CAND) pair.  */
3733
3734 static class cost_pair *
3735 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3736                    struct iv_cand *cand)
3737 {
3738   unsigned i, s;
3739   class cost_pair *ret;
3740
3741   if (!cand)
3742     return NULL;
3743
3744   if (data->consider_all_candidates)
3745     {
3746       ret = group->cost_map + cand->id;
3747       if (!ret->cand)
3748         return NULL;
3749
3750       return ret;
3751     }
3752
3753   /* n_map_members is a power of two, so this computes modulo.  */
3754   s = cand->id & (group->n_map_members - 1);
3755   for (i = s; i < group->n_map_members; i++)
3756     if (group->cost_map[i].cand == cand)
3757       return group->cost_map + i;
3758     else if (group->cost_map[i].cand == NULL)
3759       return NULL;
3760   for (i = 0; i < s; i++)
3761     if (group->cost_map[i].cand == cand)
3762       return group->cost_map + i;
3763     else if (group->cost_map[i].cand == NULL)
3764       return NULL;
3765
3766   return NULL;
3767 }
3768
3769 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3770 static rtx
3771 produce_memory_decl_rtl (tree obj, int *regno)
3772 {
3773   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3774   machine_mode address_mode = targetm.addr_space.address_mode (as);
3775   rtx x;
3776
3777   gcc_assert (obj);
3778   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3779     {
3780       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3781       x = gen_rtx_SYMBOL_REF (address_mode, name);
3782       SET_SYMBOL_REF_DECL (x, obj);
3783       x = gen_rtx_MEM (DECL_MODE (obj), x);
3784       set_mem_addr_space (x, as);
3785       targetm.encode_section_info (obj, x, true);
3786     }
3787   else
3788     {
3789       x = gen_raw_REG (address_mode, (*regno)++);
3790       x = gen_rtx_MEM (DECL_MODE (obj), x);
3791       set_mem_addr_space (x, as);
3792     }
3793
3794   return x;
3795 }
3796
3797 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3798    walk_tree.  DATA contains the actual fake register number.  */
3799
3800 static tree
3801 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3802 {
3803   tree obj = NULL_TREE;
3804   rtx x = NULL_RTX;
3805   int *regno = (int *) data;
3806
3807   switch (TREE_CODE (*expr_p))
3808     {
3809     case ADDR_EXPR:
3810       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3811            handled_component_p (*expr_p);
3812            expr_p = &TREE_OPERAND (*expr_p, 0))
3813         continue;
3814       obj = *expr_p;
3815       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3816         x = produce_memory_decl_rtl (obj, regno);
3817       break;
3818
3819     case SSA_NAME:
3820       *ws = 0;
3821       obj = SSA_NAME_VAR (*expr_p);
3822       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3823       if (!obj)
3824         return NULL_TREE;
3825       if (!DECL_RTL_SET_P (obj))
3826         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3827       break;
3828
3829     case VAR_DECL:
3830     case PARM_DECL:
3831     case RESULT_DECL:
3832       *ws = 0;
3833       obj = *expr_p;
3834
3835       if (DECL_RTL_SET_P (obj))
3836         break;
3837
3838       if (DECL_MODE (obj) == BLKmode)
3839         x = produce_memory_decl_rtl (obj, regno);
3840       else
3841         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3842
3843       break;
3844
3845     default:
3846       break;
3847     }
3848
3849   if (x)
3850     {
3851       decl_rtl_to_reset.safe_push (obj);
3852       SET_DECL_RTL (obj, x);
3853     }
3854
3855   return NULL_TREE;
3856 }
3857
3858 /* Predict whether the given loop will be transformed in the RTL
3859    doloop_optimize pass.  Attempt to duplicate some doloop_optimize checks.
3860    This is only for target independent checks, see targetm.predict_doloop_p
3861    for the target dependent ones.
3862
3863    Note that according to some initial investigation, some checks like costly
3864    niter check and invalid stmt scanning don't have much gains among general
3865    cases, so keep this as simple as possible first.
3866
3867    Some RTL specific checks seems unable to be checked in gimple, if any new
3868    checks or easy checks _are_ missing here, please add them.  */
3869
3870 static bool
3871 generic_predict_doloop_p (struct ivopts_data *data)
3872 {
3873   class loop *loop = data->current_loop;
3874
3875   /* Call target hook for target dependent checks.  */
3876   if (!targetm.predict_doloop_p (loop))
3877     {
3878       if (dump_file && (dump_flags & TDF_DETAILS))
3879         fprintf (dump_file, "Predict doloop failure due to"
3880                             " target specific checks.\n");
3881       return false;
3882     }
3883
3884   /* Similar to doloop_optimize, check iteration description to know it's
3885      suitable or not.  Keep it as simple as possible, feel free to extend it
3886      if you find any multiple exits cases matter.  */
3887   edge exit = single_dom_exit (loop);
3888   class tree_niter_desc *niter_desc;
3889   if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3890     {
3891       if (dump_file && (dump_flags & TDF_DETAILS))
3892         fprintf (dump_file, "Predict doloop failure due to"
3893                             " unexpected niters.\n");
3894       return false;
3895     }
3896
3897   /* Similar to doloop_optimize, check whether iteration count too small
3898      and not profitable.  */
3899   HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3900   if (est_niter == -1)
3901     est_niter = get_likely_max_loop_iterations_int (loop);
3902   if (est_niter >= 0 && est_niter < 3)
3903     {
3904       if (dump_file && (dump_flags & TDF_DETAILS))
3905         fprintf (dump_file,
3906                  "Predict doloop failure due to"
3907                  " too few iterations (%u).\n",
3908                  (unsigned int) est_niter);
3909       return false;
3910     }
3911
3912   return true;
3913 }
3914
3915 /* Determines cost of the computation of EXPR.  */
3916
3917 static unsigned
3918 computation_cost (tree expr, bool speed)
3919 {
3920   rtx_insn *seq;
3921   rtx rslt;
3922   tree type = TREE_TYPE (expr);
3923   unsigned cost;
3924   /* Avoid using hard regs in ways which may be unsupported.  */
3925   int regno = LAST_VIRTUAL_REGISTER + 1;
3926   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3927   enum node_frequency real_frequency = node->frequency;
3928
3929   node->frequency = NODE_FREQUENCY_NORMAL;
3930   crtl->maybe_hot_insn_p = speed;
3931   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3932   start_sequence ();
3933   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3934   seq = get_insns ();
3935   end_sequence ();
3936   default_rtl_profile ();
3937   node->frequency = real_frequency;
3938
3939   cost = seq_cost (seq, speed);
3940   if (MEM_P (rslt))
3941     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3942                           TYPE_ADDR_SPACE (type), speed);
3943   else if (!REG_P (rslt))
3944     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3945
3946   return cost;
3947 }
3948
3949 /* Returns variable containing the value of candidate CAND at statement AT.  */
3950
3951 static tree
3952 var_at_stmt (class loop *loop, struct iv_cand *cand, gimple *stmt)
3953 {
3954   if (stmt_after_increment (loop, cand, stmt))
3955     return cand->var_after;
3956   else
3957     return cand->var_before;
3958 }
3959
3960 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3961    same precision that is at least as wide as the precision of TYPE, stores
3962    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3963    type of A and B.  */
3964
3965 static tree
3966 determine_common_wider_type (tree *a, tree *b)
3967 {
3968   tree wider_type = NULL;
3969   tree suba, subb;
3970   tree atype = TREE_TYPE (*a);
3971
3972   if (CONVERT_EXPR_P (*a))
3973     {
3974       suba = TREE_OPERAND (*a, 0);
3975       wider_type = TREE_TYPE (suba);
3976       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3977         return atype;
3978     }
3979   else
3980     return atype;
3981
3982   if (CONVERT_EXPR_P (*b))
3983     {
3984       subb = TREE_OPERAND (*b, 0);
3985       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3986         return atype;
3987     }
3988   else
3989     return atype;
3990
3991   *a = suba;
3992   *b = subb;
3993   return wider_type;
3994 }
3995
3996 /* Determines the expression by that USE is expressed from induction variable
3997    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3998    decomposed form.  The invariant part is stored in AFF_INV; while variant
3999    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
4000    non-null.  Returns false if USE cannot be expressed using CAND.  */
4001
4002 static bool
4003 get_computation_aff_1 (class loop *loop, gimple *at, struct iv_use *use,
4004                        struct iv_cand *cand, class aff_tree *aff_inv,
4005                        class aff_tree *aff_var, widest_int *prat = NULL)
4006 {
4007   tree ubase = use->iv->base, ustep = use->iv->step;
4008   tree cbase = cand->iv->base, cstep = cand->iv->step;
4009   tree common_type, uutype, var, cstep_common;
4010   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4011   aff_tree aff_cbase;
4012   widest_int rat;
4013
4014   /* We must have a precision to express the values of use.  */
4015   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4016     return false;
4017
4018   var = var_at_stmt (loop, cand, at);
4019   uutype = unsigned_type_for (utype);
4020
4021   /* If the conversion is not noop, perform it.  */
4022   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4023     {
4024       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
4025           && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
4026         {
4027           tree inner_base, inner_step, inner_type;
4028           inner_base = TREE_OPERAND (cbase, 0);
4029           if (CONVERT_EXPR_P (cstep))
4030             inner_step = TREE_OPERAND (cstep, 0);
4031           else
4032             inner_step = cstep;
4033
4034           inner_type = TREE_TYPE (inner_base);
4035           /* If candidate is added from a biv whose type is smaller than
4036              ctype, we know both candidate and the biv won't overflow.
4037              In this case, it's safe to skip the convertion in candidate.
4038              As an example, (unsigned short)((unsigned long)A) equals to
4039              (unsigned short)A, if A has a type no larger than short.  */
4040           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
4041             {
4042               cbase = inner_base;
4043               cstep = inner_step;
4044             }
4045         }
4046       cbase = fold_convert (uutype, cbase);
4047       cstep = fold_convert (uutype, cstep);
4048       var = fold_convert (uutype, var);
4049     }
4050
4051   /* Ratio is 1 when computing the value of biv cand by itself.
4052      We can't rely on constant_multiple_of in this case because the
4053      use is created after the original biv is selected.  The call
4054      could fail because of inconsistent fold behavior.  See PR68021
4055      for more information.  */
4056   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4057     {
4058       gcc_assert (is_gimple_assign (use->stmt));
4059       gcc_assert (use->iv->ssa_name == cand->var_after);
4060       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
4061       rat = 1;
4062     }
4063   else if (!constant_multiple_of (ustep, cstep, &rat))
4064     return false;
4065
4066   if (prat)
4067     *prat = rat;
4068
4069   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
4070      type, we achieve better folding by computing their difference in this
4071      wider type, and cast the result to UUTYPE.  We do not need to worry about
4072      overflows, as all the arithmetics will in the end be performed in UUTYPE
4073      anyway.  */
4074   common_type = determine_common_wider_type (&ubase, &cbase);
4075
4076   /* use = ubase - ratio * cbase + ratio * var.  */
4077   tree_to_aff_combination (ubase, common_type, aff_inv);
4078   tree_to_aff_combination (cbase, common_type, &aff_cbase);
4079   tree_to_aff_combination (var, uutype, aff_var);
4080
4081   /* We need to shift the value if we are after the increment.  */
4082   if (stmt_after_increment (loop, cand, at))
4083     {
4084       aff_tree cstep_aff;
4085
4086       if (common_type != uutype)
4087         cstep_common = fold_convert (common_type, cstep);
4088       else
4089         cstep_common = cstep;
4090
4091       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
4092       aff_combination_add (&aff_cbase, &cstep_aff);
4093     }
4094
4095   aff_combination_scale (&aff_cbase, -rat);
4096   aff_combination_add (aff_inv, &aff_cbase);
4097   if (common_type != uutype)
4098     aff_combination_convert (aff_inv, uutype);
4099
4100   aff_combination_scale (aff_var, rat);
4101   return true;
4102 }
4103
4104 /* Determines the expression by that USE is expressed from induction variable
4105    CAND at statement AT in LOOP.  The expression is stored in a decomposed
4106    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
4107
4108 static bool
4109 get_computation_aff (class loop *loop, gimple *at, struct iv_use *use,
4110                      struct iv_cand *cand, class aff_tree *aff)
4111 {
4112   aff_tree aff_var;
4113
4114   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
4115     return false;
4116
4117   aff_combination_add (aff, &aff_var);
4118   return true;
4119 }
4120
4121 /* Return the type of USE.  */
4122
4123 static tree
4124 get_use_type (struct iv_use *use)
4125 {
4126   tree base_type = TREE_TYPE (use->iv->base);
4127   tree type;
4128
4129   if (use->type == USE_REF_ADDRESS)
4130     {
4131       /* The base_type may be a void pointer.  Create a pointer type based on
4132          the mem_ref instead.  */
4133       type = build_pointer_type (TREE_TYPE (*use->op_p));
4134       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4135                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4136     }
4137   else
4138     type = base_type;
4139
4140   return type;
4141 }
4142
4143 /* Determines the expression by that USE is expressed from induction variable
4144    CAND at statement AT in LOOP.  The computation is unshared.  */
4145
4146 static tree
4147 get_computation_at (class loop *loop, gimple *at,
4148                     struct iv_use *use, struct iv_cand *cand)
4149 {
4150   aff_tree aff;
4151   tree type = get_use_type (use);
4152
4153   if (!get_computation_aff (loop, at, use, cand, &aff))
4154     return NULL_TREE;
4155   unshare_aff_combination (&aff);
4156   return fold_convert (type, aff_combination_to_tree (&aff));
4157 }
4158
4159 /* Like get_computation_at, but try harder, even if the computation
4160    is more expensive.  Intended for debug stmts.  */
4161
4162 static tree
4163 get_debug_computation_at (class loop *loop, gimple *at,
4164                           struct iv_use *use, struct iv_cand *cand)
4165 {
4166   if (tree ret = get_computation_at (loop, at, use, cand))
4167     return ret;
4168
4169   tree ubase = use->iv->base, ustep = use->iv->step;
4170   tree cbase = cand->iv->base, cstep = cand->iv->step;
4171   tree var;
4172   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4173   widest_int rat;
4174
4175   /* We must have a precision to express the values of use.  */
4176   if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype))
4177     return NULL_TREE;
4178
4179   /* Try to handle the case that get_computation_at doesn't,
4180      try to express
4181      use = ubase + (var - cbase) / ratio.  */
4182   if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep),
4183                              &rat))
4184     return NULL_TREE;
4185
4186   bool neg_p = false;
4187   if (wi::neg_p (rat))
4188     {
4189       if (TYPE_UNSIGNED (ctype))
4190         return NULL_TREE;
4191       neg_p = true;
4192       rat = wi::neg (rat);
4193     }
4194
4195   /* If both IVs can wrap around and CAND doesn't have a power of two step,
4196      it is unsafe.  Consider uint16_t CAND with step 9, when wrapping around,
4197      the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say
4198      uint8_t with step 3, those values divided by 3 cast to uint8_t will be
4199      ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59.  */
4200   if (!use->iv->no_overflow
4201       && !cand->iv->no_overflow
4202       && !integer_pow2p (cstep))
4203     return NULL_TREE;
4204
4205   int bits = wi::exact_log2 (rat);
4206   if (bits == -1)
4207     bits = wi::floor_log2 (rat) + 1;
4208   if (!cand->iv->no_overflow
4209       && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype))
4210     return NULL_TREE;
4211
4212   var = var_at_stmt (loop, cand, at);
4213
4214   if (POINTER_TYPE_P (ctype))
4215     {
4216       ctype = unsigned_type_for (ctype);
4217       cbase = fold_convert (ctype, cbase);
4218       cstep = fold_convert (ctype, cstep);
4219       var = fold_convert (ctype, var);
4220     }
4221
4222   if (stmt_after_increment (loop, cand, at))
4223     var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var,
4224                        unshare_expr (cstep));
4225
4226   var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase);
4227   var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var,
4228                      wide_int_to_tree (TREE_TYPE (var), rat));
4229   if (POINTER_TYPE_P (utype))
4230     {
4231       var = fold_convert (sizetype, var);
4232       if (neg_p)
4233         var = fold_build1 (NEGATE_EXPR, sizetype, var);
4234       var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var);
4235     }
4236   else
4237     {
4238       var = fold_convert (utype, var);
4239       var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype,
4240                          ubase, var);
4241     }
4242   return var;
4243 }
4244
4245 /* Adjust the cost COST for being in loop setup rather than loop body.
4246    If we're optimizing for space, the loop setup overhead is constant;
4247    if we're optimizing for speed, amortize it over the per-iteration cost.
4248    If ROUND_UP_P is true, the result is round up rather than to zero when
4249    optimizing for speed.  */
4250 static int64_t
4251 adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4252                    bool round_up_p = false)
4253 {
4254   if (cost == INFTY)
4255     return cost;
4256   else if (optimize_loop_for_speed_p (data->current_loop))
4257     {
4258       int64_t niters = (int64_t) avg_loop_niter (data->current_loop);
4259       return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4260     }
4261   else
4262     return cost;
4263 }
4264
4265 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4266    EXPR operand holding the shift.  COST0 and COST1 are the costs for
4267    calculating the operands of EXPR.  Returns true if successful, and returns
4268    the cost in COST.  */
4269
4270 static bool
4271 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4272                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4273 {
4274   comp_cost res;
4275   tree op1 = TREE_OPERAND (expr, 1);
4276   tree cst = TREE_OPERAND (mult, 1);
4277   tree multop = TREE_OPERAND (mult, 0);
4278   int m = exact_log2 (int_cst_value (cst));
4279   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4280   int as_cost, sa_cost;
4281   bool mult_in_op1;
4282
4283   if (!(m >= 0 && m < maxm))
4284     return false;
4285
4286   STRIP_NOPS (op1);
4287   mult_in_op1 = operand_equal_p (op1, mult, 0);
4288
4289   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4290
4291   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4292      use that in preference to a shift insn followed by an add insn.  */
4293   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4294              ? shiftadd_cost (speed, mode, m)
4295              : (mult_in_op1
4296                 ? shiftsub1_cost (speed, mode, m)
4297                 : shiftsub0_cost (speed, mode, m)));
4298
4299   res = comp_cost (MIN (as_cost, sa_cost), 0);
4300   res += (mult_in_op1 ? cost0 : cost1);
4301
4302   STRIP_NOPS (multop);
4303   if (!is_gimple_val (multop))
4304     res += force_expr_to_var_cost (multop, speed);
4305
4306   *cost = res;
4307   return true;
4308 }
4309
4310 /* Estimates cost of forcing expression EXPR into a variable.  */
4311
4312 static comp_cost
4313 force_expr_to_var_cost (tree expr, bool speed)
4314 {
4315   static bool costs_initialized = false;
4316   static unsigned integer_cost [2];
4317   static unsigned symbol_cost [2];
4318   static unsigned address_cost [2];
4319   tree op0, op1;
4320   comp_cost cost0, cost1, cost;
4321   machine_mode mode;
4322   scalar_int_mode int_mode;
4323
4324   if (!costs_initialized)
4325     {
4326       tree type = build_pointer_type (integer_type_node);
4327       tree var, addr;
4328       rtx x;
4329       int i;
4330
4331       var = create_tmp_var_raw (integer_type_node, "test_var");
4332       TREE_STATIC (var) = 1;
4333       x = produce_memory_decl_rtl (var, NULL);
4334       SET_DECL_RTL (var, x);
4335
4336       addr = build1 (ADDR_EXPR, type, var);
4337
4338
4339       for (i = 0; i < 2; i++)
4340         {
4341           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4342                                                              2000), i);
4343
4344           symbol_cost[i] = computation_cost (addr, i) + 1;
4345
4346           address_cost[i]
4347             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4348           if (dump_file && (dump_flags & TDF_DETAILS))
4349             {
4350               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4351               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4352               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4353               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4354               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4355               fprintf (dump_file, "\n");
4356             }
4357         }
4358
4359       costs_initialized = true;
4360     }
4361
4362   STRIP_NOPS (expr);
4363
4364   if (SSA_VAR_P (expr))
4365     return no_cost;
4366
4367   if (is_gimple_min_invariant (expr))
4368     {
4369       if (poly_int_tree_p (expr))
4370         return comp_cost (integer_cost [speed], 0);
4371
4372       if (TREE_CODE (expr) == ADDR_EXPR)
4373         {
4374           tree obj = TREE_OPERAND (expr, 0);
4375
4376           if (VAR_P (obj)
4377               || TREE_CODE (obj) == PARM_DECL
4378               || TREE_CODE (obj) == RESULT_DECL)
4379             return comp_cost (symbol_cost [speed], 0);
4380         }
4381
4382       return comp_cost (address_cost [speed], 0);
4383     }
4384
4385   switch (TREE_CODE (expr))
4386     {
4387     case POINTER_PLUS_EXPR:
4388     case PLUS_EXPR:
4389     case MINUS_EXPR:
4390     case MULT_EXPR:
4391     case TRUNC_DIV_EXPR:
4392     case BIT_AND_EXPR:
4393     case BIT_IOR_EXPR:
4394     case LSHIFT_EXPR:
4395     case RSHIFT_EXPR:
4396       op0 = TREE_OPERAND (expr, 0);
4397       op1 = TREE_OPERAND (expr, 1);
4398       STRIP_NOPS (op0);
4399       STRIP_NOPS (op1);
4400       break;
4401
4402     CASE_CONVERT:
4403     case NEGATE_EXPR:
4404     case BIT_NOT_EXPR:
4405       op0 = TREE_OPERAND (expr, 0);
4406       STRIP_NOPS (op0);
4407       op1 = NULL_TREE;
4408       break;
4409     /* See add_iv_candidate_for_doloop, for doloop may_be_zero case, we
4410        introduce COND_EXPR for IV base, need to support better cost estimation
4411        for this COND_EXPR and tcc_comparison.  */
4412     case COND_EXPR:
4413       op0 = TREE_OPERAND (expr, 1);
4414       STRIP_NOPS (op0);
4415       op1 = TREE_OPERAND (expr, 2);
4416       STRIP_NOPS (op1);
4417       break;
4418     case LT_EXPR:
4419     case LE_EXPR:
4420     case GT_EXPR:
4421     case GE_EXPR:
4422     case EQ_EXPR:
4423     case NE_EXPR:
4424     case UNORDERED_EXPR:
4425     case ORDERED_EXPR:
4426     case UNLT_EXPR:
4427     case UNLE_EXPR:
4428     case UNGT_EXPR:
4429     case UNGE_EXPR:
4430     case UNEQ_EXPR:
4431     case LTGT_EXPR:
4432     case MAX_EXPR:
4433     case MIN_EXPR:
4434       op0 = TREE_OPERAND (expr, 0);
4435       STRIP_NOPS (op0);
4436       op1 = TREE_OPERAND (expr, 1);
4437       STRIP_NOPS (op1);
4438       break;
4439
4440     default:
4441       /* Just an arbitrary value, FIXME.  */
4442       return comp_cost (target_spill_cost[speed], 0);
4443     }
4444
4445   if (op0 == NULL_TREE
4446       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4447     cost0 = no_cost;
4448   else
4449     cost0 = force_expr_to_var_cost (op0, speed);
4450
4451   if (op1 == NULL_TREE
4452       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4453     cost1 = no_cost;
4454   else
4455     cost1 = force_expr_to_var_cost (op1, speed);
4456
4457   mode = TYPE_MODE (TREE_TYPE (expr));
4458   switch (TREE_CODE (expr))
4459     {
4460     case POINTER_PLUS_EXPR:
4461     case PLUS_EXPR:
4462     case MINUS_EXPR:
4463     case NEGATE_EXPR:
4464       cost = comp_cost (add_cost (speed, mode), 0);
4465       if (TREE_CODE (expr) != NEGATE_EXPR)
4466         {
4467           tree mult = NULL_TREE;
4468           comp_cost sa_cost;
4469           if (TREE_CODE (op1) == MULT_EXPR)
4470             mult = op1;
4471           else if (TREE_CODE (op0) == MULT_EXPR)
4472             mult = op0;
4473
4474           if (mult != NULL_TREE
4475               && is_a <scalar_int_mode> (mode, &int_mode)
4476               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4477               && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4478                                     speed, &sa_cost))
4479             return sa_cost;
4480         }
4481       break;
4482
4483     CASE_CONVERT:
4484       {
4485         tree inner_mode, outer_mode;
4486         outer_mode = TREE_TYPE (expr);
4487         inner_mode = TREE_TYPE (op0);
4488         cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4489                                        TYPE_MODE (inner_mode), speed), 0);
4490       }
4491       break;
4492
4493     case MULT_EXPR:
4494       if (cst_and_fits_in_hwi (op0))
4495         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4496                                              mode, speed), 0);
4497       else if (cst_and_fits_in_hwi (op1))
4498         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4499                                              mode, speed), 0);
4500       else
4501         return comp_cost (target_spill_cost [speed], 0);
4502       break;
4503
4504     case TRUNC_DIV_EXPR:
4505       /* Division by power of two is usually cheap, so we allow it.  Forbid
4506          anything else.  */
4507       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4508         cost = comp_cost (add_cost (speed, mode), 0);
4509       else
4510         cost = comp_cost (target_spill_cost[speed], 0);
4511       break;
4512
4513     case BIT_AND_EXPR:
4514     case BIT_IOR_EXPR:
4515     case BIT_NOT_EXPR:
4516     case LSHIFT_EXPR:
4517     case RSHIFT_EXPR:
4518       cost = comp_cost (add_cost (speed, mode), 0);
4519       break;
4520     case COND_EXPR:
4521       op0 = TREE_OPERAND (expr, 0);
4522       STRIP_NOPS (op0);
4523       if (op0 == NULL_TREE || TREE_CODE (op0) == SSA_NAME
4524           || CONSTANT_CLASS_P (op0))
4525         cost = no_cost;
4526       else
4527         cost = force_expr_to_var_cost (op0, speed);
4528       break;
4529     case LT_EXPR:
4530     case LE_EXPR:
4531     case GT_EXPR:
4532     case GE_EXPR:
4533     case EQ_EXPR:
4534     case NE_EXPR:
4535     case UNORDERED_EXPR:
4536     case ORDERED_EXPR:
4537     case UNLT_EXPR:
4538     case UNLE_EXPR:
4539     case UNGT_EXPR:
4540     case UNGE_EXPR:
4541     case UNEQ_EXPR:
4542     case LTGT_EXPR:
4543     case MAX_EXPR:
4544     case MIN_EXPR:
4545       /* Simply use add cost for now, FIXME if there is some more accurate cost
4546          evaluation way.  */
4547       cost = comp_cost (add_cost (speed, mode), 0);
4548       break;
4549
4550     default:
4551       gcc_unreachable ();
4552     }
4553
4554   cost += cost0;
4555   cost += cost1;
4556   return cost;
4557 }
4558
4559 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4560    invariants the computation depends on.  */
4561
4562 static comp_cost
4563 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4564 {
4565   if (!expr)
4566     return no_cost;
4567
4568   find_inv_vars (data, &expr, inv_vars);
4569   return force_expr_to_var_cost (expr, data->speed);
4570 }
4571
4572 /* Returns cost of auto-modifying address expression in shape base + offset.
4573    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4574    address expression.  The address expression has ADDR_MODE in addr space
4575    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4576    speed or size.  */
4577
4578 enum ainc_type
4579 {
4580   AINC_PRE_INC,         /* Pre increment.  */
4581   AINC_PRE_DEC,         /* Pre decrement.  */
4582   AINC_POST_INC,        /* Post increment.  */
4583   AINC_POST_DEC,        /* Post decrement.  */
4584   AINC_NONE             /* Also the number of auto increment types.  */
4585 };
4586
4587 struct ainc_cost_data
4588 {
4589   int64_t costs[AINC_NONE];
4590 };
4591
4592 static comp_cost
4593 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4594                        machine_mode addr_mode, machine_mode mem_mode,
4595                        addr_space_t as, bool speed)
4596 {
4597   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4598       && !USE_STORE_PRE_DECREMENT (mem_mode)
4599       && !USE_LOAD_POST_DECREMENT (mem_mode)
4600       && !USE_STORE_POST_DECREMENT (mem_mode)
4601       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4602       && !USE_STORE_PRE_INCREMENT (mem_mode)
4603       && !USE_LOAD_POST_INCREMENT (mem_mode)
4604       && !USE_STORE_POST_INCREMENT (mem_mode))
4605     return infinite_cost;
4606
4607   static vec<ainc_cost_data *> ainc_cost_data_list;
4608   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4609   if (idx >= ainc_cost_data_list.length ())
4610     {
4611       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4612
4613       gcc_assert (nsize > idx);
4614       ainc_cost_data_list.safe_grow_cleared (nsize, true);
4615     }
4616
4617   ainc_cost_data *data = ainc_cost_data_list[idx];
4618   if (data == NULL)
4619     {
4620       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4621
4622       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4623       data->costs[AINC_PRE_DEC] = INFTY;
4624       data->costs[AINC_POST_DEC] = INFTY;
4625       data->costs[AINC_PRE_INC] = INFTY;
4626       data->costs[AINC_POST_INC] = INFTY;
4627       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4628           || USE_STORE_PRE_DECREMENT (mem_mode))
4629         {
4630           rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4631
4632           if (memory_address_addr_space_p (mem_mode, addr, as))
4633             data->costs[AINC_PRE_DEC]
4634               = address_cost (addr, mem_mode, as, speed);
4635         }
4636       if (USE_LOAD_POST_DECREMENT (mem_mode)
4637           || USE_STORE_POST_DECREMENT (mem_mode))
4638         {
4639           rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4640
4641           if (memory_address_addr_space_p (mem_mode, addr, as))
4642             data->costs[AINC_POST_DEC]
4643               = address_cost (addr, mem_mode, as, speed);
4644         }
4645       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4646           || USE_STORE_PRE_INCREMENT (mem_mode))
4647         {
4648           rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4649
4650           if (memory_address_addr_space_p (mem_mode, addr, as))
4651             data->costs[AINC_PRE_INC]
4652               = address_cost (addr, mem_mode, as, speed);
4653         }
4654       if (USE_LOAD_POST_INCREMENT (mem_mode)
4655           || USE_STORE_POST_INCREMENT (mem_mode))
4656         {
4657           rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4658
4659           if (memory_address_addr_space_p (mem_mode, addr, as))
4660             data->costs[AINC_POST_INC]
4661               = address_cost (addr, mem_mode, as, speed);
4662         }
4663       ainc_cost_data_list[idx] = data;
4664     }
4665
4666   poly_int64 msize = GET_MODE_SIZE (mem_mode);
4667   if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4668     return comp_cost (data->costs[AINC_POST_INC], 0);
4669   if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4670     return comp_cost (data->costs[AINC_POST_DEC], 0);
4671   if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4672     return comp_cost (data->costs[AINC_PRE_INC], 0);
4673   if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4674     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4675
4676   return infinite_cost;
4677 }
4678
4679 /* Return cost of computing USE's address expression by using CAND.
4680    AFF_INV and AFF_VAR represent invariant and variant parts of the
4681    address expression, respectively.  If AFF_INV is simple, store
4682    the loop invariant variables which are depended by it in INV_VARS;
4683    if AFF_INV is complicated, handle it as a new invariant expression
4684    and record it in INV_EXPR.  RATIO indicates multiple times between
4685    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4686    value to it indicating if this is an auto-increment address.  */
4687
4688 static comp_cost
4689 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4690                   struct iv_cand *cand, aff_tree *aff_inv,
4691                   aff_tree *aff_var, HOST_WIDE_INT ratio,
4692                   bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4693                   bool *can_autoinc, bool speed)
4694 {
4695   rtx addr;
4696   bool simple_inv = true;
4697   tree comp_inv = NULL_TREE, type = aff_var->type;
4698   comp_cost var_cost = no_cost, cost = no_cost;
4699   struct mem_address parts = {NULL_TREE, integer_one_node,
4700                               NULL_TREE, NULL_TREE, NULL_TREE};
4701   machine_mode addr_mode = TYPE_MODE (type);
4702   machine_mode mem_mode = TYPE_MODE (use->mem_type);
4703   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4704   /* Only true if ratio != 1.  */
4705   bool ok_with_ratio_p = false;
4706   bool ok_without_ratio_p = false;
4707
4708   if (!aff_combination_const_p (aff_inv))
4709     {
4710       parts.index = integer_one_node;
4711       /* Addressing mode "base + index".  */
4712       ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4713       if (ratio != 1)
4714         {
4715           parts.step = wide_int_to_tree (type, ratio);
4716           /* Addressing mode "base + index << scale".  */
4717           ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4718           if (!ok_with_ratio_p)
4719             parts.step = NULL_TREE;
4720         }
4721       if (ok_with_ratio_p || ok_without_ratio_p)
4722         {
4723           if (maybe_ne (aff_inv->offset, 0))
4724             {
4725               parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4726               /* Addressing mode "base + index [<< scale] + offset".  */
4727               if (!valid_mem_ref_p (mem_mode, as, &parts))
4728                 parts.offset = NULL_TREE;
4729               else
4730                 aff_inv->offset = 0;
4731             }
4732
4733           move_fixed_address_to_symbol (&parts, aff_inv);
4734           /* Base is fixed address and is moved to symbol part.  */
4735           if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4736             parts.base = NULL_TREE;
4737
4738           /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4739           if (parts.symbol != NULL_TREE
4740               && !valid_mem_ref_p (mem_mode, as, &parts))
4741             {
4742               aff_combination_add_elt (aff_inv, parts.symbol, 1);
4743               parts.symbol = NULL_TREE;
4744               /* Reset SIMPLE_INV since symbol address needs to be computed
4745                  outside of address expression in this case.  */
4746               simple_inv = false;
4747               /* Symbol part is moved back to base part, it can't be NULL.  */
4748               parts.base = integer_one_node;
4749             }
4750         }
4751       else
4752         parts.index = NULL_TREE;
4753     }
4754   else
4755     {
4756       poly_int64 ainc_step;
4757       if (can_autoinc
4758           && ratio == 1
4759           && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4760         {
4761           poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4762
4763           if (stmt_after_increment (data->current_loop, cand, use->stmt))
4764             ainc_offset += ainc_step;
4765           cost = get_address_cost_ainc (ainc_step, ainc_offset,
4766                                         addr_mode, mem_mode, as, speed);
4767           if (!cost.infinite_cost_p ())
4768             {
4769               *can_autoinc = true;
4770               return cost;
4771             }
4772           cost = no_cost;
4773         }
4774       if (!aff_combination_zero_p (aff_inv))
4775         {
4776           parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4777           /* Addressing mode "base + offset".  */
4778           if (!valid_mem_ref_p (mem_mode, as, &parts))
4779             parts.offset = NULL_TREE;
4780           else
4781             aff_inv->offset = 0;
4782         }
4783     }
4784
4785   if (simple_inv)
4786     simple_inv = (aff_inv == NULL
4787                   || aff_combination_const_p (aff_inv)
4788                   || aff_combination_singleton_var_p (aff_inv));
4789   if (!aff_combination_zero_p (aff_inv))
4790     comp_inv = aff_combination_to_tree (aff_inv);
4791   if (comp_inv != NULL_TREE)
4792     cost = force_var_cost (data, comp_inv, inv_vars);
4793   if (ratio != 1 && parts.step == NULL_TREE)
4794     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4795   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4796     var_cost += add_cost (speed, addr_mode);
4797
4798   if (comp_inv && inv_expr && !simple_inv)
4799     {
4800       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4801       /* Clear depends on.  */
4802       if (*inv_expr != NULL && inv_vars && *inv_vars)
4803         bitmap_clear (*inv_vars);
4804
4805       /* Cost of small invariant expression adjusted against loop niters
4806          is usually zero, which makes it difficult to be differentiated
4807          from candidate based on loop invariant variables.  Secondly, the
4808          generated invariant expression may not be hoisted out of loop by
4809          following pass.  We penalize the cost by rounding up in order to
4810          neutralize such effects.  */
4811       cost.cost = adjust_setup_cost (data, cost.cost, true);
4812       cost.scratch = cost.cost;
4813     }
4814
4815   cost += var_cost;
4816   addr = addr_for_mem_ref (&parts, as, false);
4817   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4818   cost += address_cost (addr, mem_mode, as, speed);
4819
4820   if (parts.symbol != NULL_TREE)
4821     cost.complexity += 1;
4822   /* Don't increase the complexity of adding a scaled index if it's
4823      the only kind of index that the target allows.  */
4824   if (parts.step != NULL_TREE && ok_without_ratio_p)
4825     cost.complexity += 1;
4826   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4827     cost.complexity += 1;
4828   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4829     cost.complexity += 1;
4830
4831   return cost;
4832 }
4833
4834 /* Scale (multiply) the computed COST (except scratch part that should be
4835    hoisted out a loop) by header->frequency / AT->frequency, which makes
4836    expected cost more accurate.  */
4837
4838 static comp_cost
4839 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4840 {
4841   if (data->speed
4842       && data->current_loop->header->count.to_frequency (cfun) > 0)
4843     {
4844       basic_block bb = gimple_bb (at);
4845       gcc_assert (cost.scratch <= cost.cost);
4846       int scale_factor = (int)(intptr_t) bb->aux;
4847       if (scale_factor == 1)
4848         return cost;
4849
4850       int64_t scaled_cost
4851         = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4852
4853       if (dump_file && (dump_flags & TDF_DETAILS))
4854         fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
4855                  "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4856                  1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4857
4858       cost.cost = scaled_cost;
4859     }
4860
4861   return cost;
4862 }
4863
4864 /* Determines the cost of the computation by that USE is expressed
4865    from induction variable CAND.  If ADDRESS_P is true, we just need
4866    to create an address from it, otherwise we want to get it into
4867    register.  A set of invariants we depend on is stored in INV_VARS.
4868    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4869    addressing is likely.  If INV_EXPR is nonnull, record invariant
4870    expr entry in it.  */
4871
4872 static comp_cost
4873 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4874                       struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4875                       bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4876 {
4877   gimple *at = use->stmt;
4878   tree ubase = use->iv->base, cbase = cand->iv->base;
4879   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4880   tree comp_inv = NULL_TREE;
4881   HOST_WIDE_INT ratio, aratio;
4882   comp_cost cost;
4883   widest_int rat;
4884   aff_tree aff_inv, aff_var;
4885   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4886
4887   if (inv_vars)
4888     *inv_vars = NULL;
4889   if (can_autoinc)
4890     *can_autoinc = false;
4891   if (inv_expr)
4892     *inv_expr = NULL;
4893
4894   /* Check if we have enough precision to express the values of use.  */
4895   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4896     return infinite_cost;
4897
4898   if (address_p
4899       || (use->iv->base_object
4900           && cand->iv->base_object
4901           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4902           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4903     {
4904       /* Do not try to express address of an object with computation based
4905          on address of a different object.  This may cause problems in rtl
4906          level alias analysis (that does not expect this to be happening,
4907          as this is illegal in C), and would be unlikely to be useful
4908          anyway.  */
4909       if (use->iv->base_object
4910           && cand->iv->base_object
4911           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4912         return infinite_cost;
4913     }
4914
4915   if (!get_computation_aff_1 (data->current_loop, at, use,
4916                               cand, &aff_inv, &aff_var, &rat)
4917       || !wi::fits_shwi_p (rat))
4918     return infinite_cost;
4919
4920   ratio = rat.to_shwi ();
4921   if (address_p)
4922     {
4923       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4924                                inv_vars, inv_expr, can_autoinc, speed);
4925       cost = get_scaled_computation_cost_at (data, at, cost);
4926       /* For doloop IV cand, add on the extra cost.  */
4927       cost += cand->doloop_p ? targetm.doloop_cost_for_address : 0;
4928       return cost;
4929     }
4930
4931   bool simple_inv = (aff_combination_const_p (&aff_inv)
4932                      || aff_combination_singleton_var_p (&aff_inv));
4933   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4934   aff_combination_convert (&aff_inv, signed_type);
4935   if (!aff_combination_zero_p (&aff_inv))
4936     comp_inv = aff_combination_to_tree (&aff_inv);
4937
4938   cost = force_var_cost (data, comp_inv, inv_vars);
4939   if (comp_inv && inv_expr && !simple_inv)
4940     {
4941       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4942       /* Clear depends on.  */
4943       if (*inv_expr != NULL && inv_vars && *inv_vars)
4944         bitmap_clear (*inv_vars);
4945
4946       cost.cost = adjust_setup_cost (data, cost.cost);
4947       /* Record setup cost in scratch field.  */
4948       cost.scratch = cost.cost;
4949     }
4950   /* Cost of constant integer can be covered when adding invariant part to
4951      variant part.  */
4952   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4953     cost = no_cost;
4954
4955   /* Need type narrowing to represent use with cand.  */
4956   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4957     {
4958       machine_mode outer_mode = TYPE_MODE (utype);
4959       machine_mode inner_mode = TYPE_MODE (ctype);
4960       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4961     }
4962
4963   /* Turn a + i * (-c) into a - i * c.  */
4964   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4965     aratio = -ratio;
4966   else
4967     aratio = ratio;
4968
4969   if (ratio != 1)
4970     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4971
4972   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4973      instruction.  */
4974   /* Need to add up the invariant and variant parts.  */
4975   if (comp_inv && !integer_zerop (comp_inv))
4976     cost += add_cost (speed, TYPE_MODE (utype));
4977
4978   cost = get_scaled_computation_cost_at (data, at, cost);
4979
4980   /* For doloop IV cand, add on the extra cost.  */
4981   if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR)
4982     cost += targetm.doloop_cost_for_generic;
4983
4984   return cost;
4985 }
4986
4987 /* Determines cost of computing the use in GROUP with CAND in a generic
4988    expression.  */
4989
4990 static bool
4991 determine_group_iv_cost_generic (struct ivopts_data *data,
4992                                  struct iv_group *group, struct iv_cand *cand)
4993 {
4994   comp_cost cost;
4995   iv_inv_expr_ent *inv_expr = NULL;
4996   bitmap inv_vars = NULL, inv_exprs = NULL;
4997   struct iv_use *use = group->vuses[0];
4998
4999   /* The simple case first -- if we need to express value of the preserved
5000      original biv, the cost is 0.  This also prevents us from counting the
5001      cost of increment twice -- once at this use and once in the cost of
5002      the candidate.  */
5003   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
5004     cost = no_cost;
5005   /* If the IV candidate involves undefined SSA values and is not the
5006      same IV as on the USE avoid using that candidate here.  */
5007   else if (cand->involves_undefs
5008            && (!use->iv || !operand_equal_p (cand->iv->base, use->iv->base, 0)))
5009     return false;
5010   else
5011     cost = get_computation_cost (data, use, cand, false,
5012                                  &inv_vars, NULL, &inv_expr);
5013
5014   if (inv_expr)
5015     {
5016       inv_exprs = BITMAP_ALLOC (NULL);
5017       bitmap_set_bit (inv_exprs, inv_expr->id);
5018     }
5019   set_group_iv_cost (data, group, cand, cost, inv_vars,
5020                      NULL_TREE, ERROR_MARK, inv_exprs);
5021   return !cost.infinite_cost_p ();
5022 }
5023
5024 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
5025
5026 static bool
5027 determine_group_iv_cost_address (struct ivopts_data *data,
5028                                  struct iv_group *group, struct iv_cand *cand)
5029 {
5030   unsigned i;
5031   bitmap inv_vars = NULL, inv_exprs = NULL;
5032   bool can_autoinc;
5033   iv_inv_expr_ent *inv_expr = NULL;
5034   struct iv_use *use = group->vuses[0];
5035   comp_cost sum_cost = no_cost, cost;
5036
5037   cost = get_computation_cost (data, use, cand, true,
5038                                &inv_vars, &can_autoinc, &inv_expr);
5039
5040   if (inv_expr)
5041     {
5042       inv_exprs = BITMAP_ALLOC (NULL);
5043       bitmap_set_bit (inv_exprs, inv_expr->id);
5044     }
5045   sum_cost = cost;
5046   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5047     {
5048       if (can_autoinc)
5049         sum_cost -= cand->cost_step;
5050       /* If we generated the candidate solely for exploiting autoincrement
5051          opportunities, and it turns out it can't be used, set the cost to
5052          infinity to make sure we ignore it.  */
5053       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5054         sum_cost = infinite_cost;
5055     }
5056
5057   /* Uses in a group can share setup code, so only add setup cost once.  */
5058   cost -= cost.scratch;
5059   /* Compute and add costs for rest uses of this group.  */
5060   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5061     {
5062       struct iv_use *next = group->vuses[i];
5063
5064       /* TODO: We could skip computing cost for sub iv_use when it has the
5065          same cost as the first iv_use, but the cost really depends on the
5066          offset and where the iv_use is.  */
5067         cost = get_computation_cost (data, next, cand, true,
5068                                      NULL, &can_autoinc, &inv_expr);
5069         if (inv_expr)
5070           {
5071             if (!inv_exprs)
5072               inv_exprs = BITMAP_ALLOC (NULL);
5073
5074             bitmap_set_bit (inv_exprs, inv_expr->id);
5075           }
5076       sum_cost += cost;
5077     }
5078   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
5079                      NULL_TREE, ERROR_MARK, inv_exprs);
5080
5081   return !sum_cost.infinite_cost_p ();
5082 }
5083
5084 /* Computes value of candidate CAND at position AT in iteration DESC->NITER,
5085    and stores it to VAL.  */
5086
5087 static void
5088 cand_value_at (class loop *loop, struct iv_cand *cand, gimple *at,
5089                class tree_niter_desc *desc, aff_tree *val)
5090 {
5091   aff_tree step, delta, nit;
5092   struct iv *iv = cand->iv;
5093   tree type = TREE_TYPE (iv->base);
5094   tree niter = desc->niter;
5095   bool after_adjust = stmt_after_increment (loop, cand, at);
5096   tree steptype;
5097
5098   if (POINTER_TYPE_P (type))
5099     steptype = sizetype;
5100   else
5101     steptype = unsigned_type_for (type);
5102
5103   /* If AFTER_ADJUST is required, the code below generates the equivalent
5104      of BASE + NITER * STEP + STEP, when ideally we'd prefer the expression
5105      BASE + (NITER + 1) * STEP, especially when NITER is often of the form
5106      SSA_NAME - 1.  Unfortunately, guaranteeing that adding 1 to NITER
5107      doesn't overflow is tricky, so we peek inside the TREE_NITER_DESC
5108      class for common idioms that we know are safe.  */
5109   if (after_adjust
5110       && desc->control.no_overflow
5111       && integer_onep (desc->control.step)
5112       && (desc->cmp == LT_EXPR
5113           || desc->cmp == NE_EXPR)
5114       && TREE_CODE (desc->bound) == SSA_NAME)
5115     {
5116       if (integer_onep (desc->control.base))
5117         {
5118           niter = desc->bound;
5119           after_adjust = false;
5120         }
5121       else if (TREE_CODE (niter) == MINUS_EXPR
5122                && integer_onep (TREE_OPERAND (niter, 1)))
5123         {
5124           niter = TREE_OPERAND (niter, 0);
5125           after_adjust = false;
5126         }
5127     }
5128
5129   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5130   aff_combination_convert (&step, steptype);
5131   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5132   aff_combination_convert (&nit, steptype);
5133   aff_combination_mult (&nit, &step, &delta);
5134   if (after_adjust)
5135     aff_combination_add (&delta, &step);
5136
5137   tree_to_aff_combination (iv->base, type, val);
5138   if (!POINTER_TYPE_P (type))
5139     aff_combination_convert (val, steptype);
5140   aff_combination_add (val, &delta);
5141 }
5142
5143 /* Returns period of induction variable iv.  */
5144
5145 static tree
5146 iv_period (struct iv *iv)
5147 {
5148   tree step = iv->step, period, type;
5149   tree pow2div;
5150
5151   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5152
5153   type = unsigned_type_for (TREE_TYPE (step));
5154   /* Period of the iv is lcm (step, type_range)/step -1,
5155      i.e., N*type_range/step - 1. Since type range is power
5156      of two, N == (step >> num_of_ending_zeros_binary (step),
5157      so the final result is
5158
5159        (type_range >> num_of_ending_zeros_binary (step)) - 1
5160
5161   */
5162   pow2div = num_ending_zeros (step);
5163
5164   period = build_low_bits_mask (type,
5165                                 (TYPE_PRECISION (type)
5166                                  - tree_to_uhwi (pow2div)));
5167
5168   return period;
5169 }
5170
5171 /* Returns the comparison operator used when eliminating the iv USE.  */
5172
5173 static enum tree_code
5174 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5175 {
5176   class loop *loop = data->current_loop;
5177   basic_block ex_bb;
5178   edge exit;
5179
5180   ex_bb = gimple_bb (use->stmt);
5181   exit = EDGE_SUCC (ex_bb, 0);
5182   if (flow_bb_inside_loop_p (loop, exit->dest))
5183     exit = EDGE_SUCC (ex_bb, 1);
5184
5185   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5186 }
5187
5188 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5189    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5190    calculation is performed in non-wrapping type.
5191
5192    TODO: More generally, we could test for the situation that
5193          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5194          This would require knowing the sign of OFFSET.  */
5195
5196 static bool
5197 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5198 {
5199   enum tree_code code;
5200   tree e1, e2;
5201   aff_tree aff_e1, aff_e2, aff_offset;
5202
5203   if (!nowrap_type_p (TREE_TYPE (base)))
5204     return false;
5205
5206   base = expand_simple_operations (base);
5207
5208   if (TREE_CODE (base) == SSA_NAME)
5209     {
5210       gimple *stmt = SSA_NAME_DEF_STMT (base);
5211
5212       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5213         return false;
5214
5215       code = gimple_assign_rhs_code (stmt);
5216       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5217         return false;
5218
5219       e1 = gimple_assign_rhs1 (stmt);
5220       e2 = gimple_assign_rhs2 (stmt);
5221     }
5222   else
5223     {
5224       code = TREE_CODE (base);
5225       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5226         return false;
5227       e1 = TREE_OPERAND (base, 0);
5228       e2 = TREE_OPERAND (base, 1);
5229     }
5230
5231   /* Use affine expansion as deeper inspection to prove the equality.  */
5232   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5233                                   &aff_e2, &data->name_expansion_cache);
5234   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5235                                   &aff_offset, &data->name_expansion_cache);
5236   aff_combination_scale (&aff_offset, -1);
5237   switch (code)
5238     {
5239     case PLUS_EXPR:
5240       aff_combination_add (&aff_e2, &aff_offset);
5241       if (aff_combination_zero_p (&aff_e2))
5242         return true;
5243
5244       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5245                                       &aff_e1, &data->name_expansion_cache);
5246       aff_combination_add (&aff_e1, &aff_offset);
5247       return aff_combination_zero_p (&aff_e1);
5248
5249     case POINTER_PLUS_EXPR:
5250       aff_combination_add (&aff_e2, &aff_offset);
5251       return aff_combination_zero_p (&aff_e2);
5252
5253     default:
5254       return false;
5255     }
5256 }
5257
5258 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5259    comparison with CAND.  NITER describes the number of iterations of
5260    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5261
5262    We aim to handle the following situation:
5263
5264    sometype *base, *p;
5265    int a, b, i;
5266
5267    i = a;
5268    p = p_0 = base + a;
5269
5270    do
5271      {
5272        bla (*p);
5273        p++;
5274        i++;
5275      }
5276    while (i < b);
5277
5278    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5279    We aim to optimize this to
5280
5281    p = p_0 = base + a;
5282    do
5283      {
5284        bla (*p);
5285        p++;
5286      }
5287    while (p < p_0 - a + b);
5288
5289    This preserves the correctness, since the pointer arithmetics does not
5290    overflow.  More precisely:
5291
5292    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5293       overflow in computing it or the values of p.
5294    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5295       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5296
5297 static bool
5298 iv_elimination_compare_lt (struct ivopts_data *data,
5299                            struct iv_cand *cand, enum tree_code *comp_p,
5300                            class tree_niter_desc *niter)
5301 {
5302   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5303   class aff_tree nit, tmpa, tmpb;
5304   enum tree_code comp;
5305   HOST_WIDE_INT step;
5306
5307   /* We need to know that the candidate induction variable does not overflow.
5308      While more complex analysis may be used to prove this, for now just
5309      check that the variable appears in the original program and that it
5310      is computed in a type that guarantees no overflows.  */
5311   cand_type = TREE_TYPE (cand->iv->base);
5312   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5313     return false;
5314
5315   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5316      the calculation of the BOUND could overflow, making the comparison
5317      invalid.  */
5318   if (!data->loop_single_exit_p)
5319     return false;
5320
5321   /* We need to be able to decide whether candidate is increasing or decreasing
5322      in order to choose the right comparison operator.  */
5323   if (!cst_and_fits_in_hwi (cand->iv->step))
5324     return false;
5325   step = int_cst_value (cand->iv->step);
5326
5327   /* Check that the number of iterations matches the expected pattern:
5328      a + 1 > b ? 0 : b - a - 1.  */
5329   mbz = niter->may_be_zero;
5330   if (TREE_CODE (mbz) == GT_EXPR)
5331     {
5332       /* Handle a + 1 > b.  */
5333       tree op0 = TREE_OPERAND (mbz, 0);
5334       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5335         {
5336           a = TREE_OPERAND (op0, 0);
5337           b = TREE_OPERAND (mbz, 1);
5338         }
5339       else
5340         return false;
5341     }
5342   else if (TREE_CODE (mbz) == LT_EXPR)
5343     {
5344       tree op1 = TREE_OPERAND (mbz, 1);
5345
5346       /* Handle b < a + 1.  */
5347       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5348         {
5349           a = TREE_OPERAND (op1, 0);
5350           b = TREE_OPERAND (mbz, 0);
5351         }
5352       else
5353         return false;
5354     }
5355   else
5356     return false;
5357
5358   /* Expected number of iterations is B - A - 1.  Check that it matches
5359      the actual number, i.e., that B - A - NITER = 1.  */
5360   tree_to_aff_combination (niter->niter, nit_type, &nit);
5361   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5362   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5363   aff_combination_scale (&nit, -1);
5364   aff_combination_scale (&tmpa, -1);
5365   aff_combination_add (&tmpb, &tmpa);
5366   aff_combination_add (&tmpb, &nit);
5367   if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5368     return false;
5369
5370   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5371      overflow.  */
5372   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5373                         cand->iv->step,
5374                         fold_convert (TREE_TYPE (cand->iv->step), a));
5375   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5376     return false;
5377
5378   /* Determine the new comparison operator.  */
5379   comp = step < 0 ? GT_EXPR : LT_EXPR;
5380   if (*comp_p == NE_EXPR)
5381     *comp_p = comp;
5382   else if (*comp_p == EQ_EXPR)
5383     *comp_p = invert_tree_comparison (comp, false);
5384   else
5385     gcc_unreachable ();
5386
5387   return true;
5388 }
5389
5390 /* Check whether it is possible to express the condition in USE by comparison
5391    of candidate CAND.  If so, store the value compared with to BOUND, and the
5392    comparison operator to COMP.  */
5393
5394 static bool
5395 may_eliminate_iv (struct ivopts_data *data,
5396                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5397                   enum tree_code *comp)
5398 {
5399   basic_block ex_bb;
5400   edge exit;
5401   tree period;
5402   class loop *loop = data->current_loop;
5403   aff_tree bnd;
5404   class tree_niter_desc *desc = NULL;
5405
5406   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5407     return false;
5408
5409   /* For now works only for exits that dominate the loop latch.
5410      TODO: extend to other conditions inside loop body.  */
5411   ex_bb = gimple_bb (use->stmt);
5412   if (use->stmt != last_nondebug_stmt (ex_bb)
5413       || gimple_code (use->stmt) != GIMPLE_COND
5414       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5415     return false;
5416
5417   exit = EDGE_SUCC (ex_bb, 0);
5418   if (flow_bb_inside_loop_p (loop, exit->dest))
5419     exit = EDGE_SUCC (ex_bb, 1);
5420   if (flow_bb_inside_loop_p (loop, exit->dest))
5421     return false;
5422
5423   desc = niter_for_exit (data, exit);
5424   if (!desc)
5425     return false;
5426
5427   /* Determine whether we can use the variable to test the exit condition.
5428      This is the case iff the period of the induction variable is greater
5429      than the number of iterations for which the exit condition is true.  */
5430   period = iv_period (cand->iv);
5431
5432   /* If the number of iterations is constant, compare against it directly.  */
5433   if (TREE_CODE (desc->niter) == INTEGER_CST)
5434     {
5435       /* See cand_value_at.  */
5436       if (stmt_after_increment (loop, cand, use->stmt))
5437         {
5438           if (!tree_int_cst_lt (desc->niter, period))
5439             return false;
5440         }
5441       else
5442         {
5443           if (tree_int_cst_lt (period, desc->niter))
5444             return false;
5445         }
5446     }
5447
5448   /* If not, and if this is the only possible exit of the loop, see whether
5449      we can get a conservative estimate on the number of iterations of the
5450      entire loop and compare against that instead.  */
5451   else
5452     {
5453       widest_int period_value, max_niter;
5454
5455       max_niter = desc->max;
5456       if (stmt_after_increment (loop, cand, use->stmt))
5457         max_niter += 1;
5458       period_value = wi::to_widest (period);
5459       if (wi::gtu_p (max_niter, period_value))
5460         {
5461           /* See if we can take advantage of inferred loop bound
5462              information.  */
5463           if (data->loop_single_exit_p)
5464             {
5465               if (!max_loop_iterations (loop, &max_niter))
5466                 return false;
5467               /* The loop bound is already adjusted by adding 1.  */
5468               if (wi::gtu_p (max_niter, period_value))
5469                 return false;
5470             }
5471           else
5472             return false;
5473         }
5474     }
5475
5476   /* For doloop IV cand, the bound would be zero.  It's safe whether
5477      may_be_zero set or not.  */
5478   if (cand->doloop_p)
5479     {
5480       *bound = build_int_cst (TREE_TYPE (cand->iv->base), 0);
5481       *comp = iv_elimination_compare (data, use);
5482       return true;
5483     }
5484
5485   cand_value_at (loop, cand, use->stmt, desc, &bnd);
5486
5487   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5488                          aff_combination_to_tree (&bnd));
5489   *comp = iv_elimination_compare (data, use);
5490
5491   /* It is unlikely that computing the number of iterations using division
5492      would be more profitable than keeping the original induction variable.  */
5493   if (expression_expensive_p (*bound))
5494     return false;
5495
5496   /* Sometimes, it is possible to handle the situation that the number of
5497      iterations may be zero unless additional assumptions by using <
5498      instead of != in the exit condition.
5499
5500      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5501            base the exit condition on it.  However, that is often too
5502            expensive.  */
5503   if (!integer_zerop (desc->may_be_zero))
5504     return iv_elimination_compare_lt (data, cand, comp, desc);
5505
5506   return true;
5507 }
5508
5509  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5510     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5511
5512 static int
5513 parm_decl_cost (struct ivopts_data *data, tree bound)
5514 {
5515   tree sbound = bound;
5516   STRIP_NOPS (sbound);
5517
5518   if (TREE_CODE (sbound) == SSA_NAME
5519       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5520       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5521       && data->body_includes_call)
5522     return COSTS_N_INSNS (1);
5523
5524   return 0;
5525 }
5526
5527 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5528
5529 static bool
5530 determine_group_iv_cost_cond (struct ivopts_data *data,
5531                               struct iv_group *group, struct iv_cand *cand)
5532 {
5533   tree bound = NULL_TREE;
5534   struct iv *cmp_iv;
5535   bitmap inv_exprs = NULL;
5536   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5537   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5538   enum comp_iv_rewrite rewrite_type;
5539   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5540   tree *control_var, *bound_cst;
5541   enum tree_code comp = ERROR_MARK;
5542   struct iv_use *use = group->vuses[0];
5543
5544   /* Extract condition operands.  */
5545   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5546                                         &bound_cst, NULL, &cmp_iv);
5547   gcc_assert (rewrite_type != COMP_IV_NA);
5548
5549   /* Try iv elimination.  */
5550   if (rewrite_type == COMP_IV_ELIM
5551       && may_eliminate_iv (data, use, cand, &bound, &comp))
5552     {
5553       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5554       if (elim_cost.cost == 0)
5555         elim_cost.cost = parm_decl_cost (data, bound);
5556       else if (TREE_CODE (bound) == INTEGER_CST)
5557         elim_cost.cost = 0;
5558       /* If we replace a loop condition 'i < n' with 'p < base + n',
5559          inv_vars_elim will have 'base' and 'n' set, which implies that both
5560          'base' and 'n' will be live during the loop.    More likely,
5561          'base + n' will be loop invariant, resulting in only one live value
5562          during the loop.  So in that case we clear inv_vars_elim and set
5563          inv_expr_elim instead.  */
5564       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5565         {
5566           inv_expr_elim = get_loop_invariant_expr (data, bound);
5567           bitmap_clear (inv_vars_elim);
5568         }
5569       /* The bound is a loop invariant, so it will be only computed
5570          once.  */
5571       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5572     }
5573
5574   /* When the condition is a comparison of the candidate IV against
5575      zero, prefer this IV.
5576
5577      TODO: The constant that we're subtracting from the cost should
5578      be target-dependent.  This information should be added to the
5579      target costs for each backend.  */
5580   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5581       && integer_zerop (*bound_cst)
5582       && (operand_equal_p (*control_var, cand->var_after, 0)
5583           || operand_equal_p (*control_var, cand->var_before, 0)))
5584     elim_cost -= 1;
5585
5586   express_cost = get_computation_cost (data, use, cand, false,
5587                                        &inv_vars_express, NULL,
5588                                        &inv_expr_express);
5589   if (cmp_iv != NULL)
5590     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5591
5592   /* Count the cost of the original bound as well.  */
5593   bound_cost = force_var_cost (data, *bound_cst, NULL);
5594   if (bound_cost.cost == 0)
5595     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5596   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5597     bound_cost.cost = 0;
5598   express_cost += bound_cost;
5599
5600   /* Choose the better approach, preferring the eliminated IV. */
5601   if (elim_cost <= express_cost)
5602     {
5603       cost = elim_cost;
5604       inv_vars = inv_vars_elim;
5605       inv_vars_elim = NULL;
5606       inv_expr = inv_expr_elim;
5607       /* For doloop candidate/use pair, adjust to zero cost.  */
5608       if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost)
5609         cost = no_cost;
5610     }
5611   else
5612     {
5613       cost = express_cost;
5614       inv_vars = inv_vars_express;
5615       inv_vars_express = NULL;
5616       bound = NULL_TREE;
5617       comp = ERROR_MARK;
5618       inv_expr = inv_expr_express;
5619     }
5620
5621   if (inv_expr)
5622     {
5623       inv_exprs = BITMAP_ALLOC (NULL);
5624       bitmap_set_bit (inv_exprs, inv_expr->id);
5625     }
5626   set_group_iv_cost (data, group, cand, cost,
5627                      inv_vars, bound, comp, inv_exprs);
5628
5629   if (inv_vars_elim)
5630     BITMAP_FREE (inv_vars_elim);
5631   if (inv_vars_express)
5632     BITMAP_FREE (inv_vars_express);
5633
5634   return !cost.infinite_cost_p ();
5635 }
5636
5637 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5638    if USE cannot be represented with CAND.  */
5639
5640 static bool
5641 determine_group_iv_cost (struct ivopts_data *data,
5642                          struct iv_group *group, struct iv_cand *cand)
5643 {
5644   switch (group->type)
5645     {
5646     case USE_NONLINEAR_EXPR:
5647       return determine_group_iv_cost_generic (data, group, cand);
5648
5649     case USE_REF_ADDRESS:
5650     case USE_PTR_ADDRESS:
5651       return determine_group_iv_cost_address (data, group, cand);
5652
5653     case USE_COMPARE:
5654       return determine_group_iv_cost_cond (data, group, cand);
5655
5656     default:
5657       gcc_unreachable ();
5658     }
5659 }
5660
5661 /* Return true if get_computation_cost indicates that autoincrement is
5662    a possibility for the pair of USE and CAND, false otherwise.  */
5663
5664 static bool
5665 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5666                            struct iv_cand *cand)
5667 {
5668   if (!address_p (use->type))
5669     return false;
5670
5671   bool can_autoinc = false;
5672   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5673   return can_autoinc;
5674 }
5675
5676 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5677    use that allows autoincrement, and set their AINC_USE if possible.  */
5678
5679 static void
5680 set_autoinc_for_original_candidates (struct ivopts_data *data)
5681 {
5682   unsigned i, j;
5683
5684   for (i = 0; i < data->vcands.length (); i++)
5685     {
5686       struct iv_cand *cand = data->vcands[i];
5687       struct iv_use *closest_before = NULL;
5688       struct iv_use *closest_after = NULL;
5689       if (cand->pos != IP_ORIGINAL)
5690         continue;
5691
5692       for (j = 0; j < data->vgroups.length (); j++)
5693         {
5694           struct iv_group *group = data->vgroups[j];
5695           struct iv_use *use = group->vuses[0];
5696           unsigned uid = gimple_uid (use->stmt);
5697
5698           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5699             continue;
5700
5701           if (uid < gimple_uid (cand->incremented_at)
5702               && (closest_before == NULL
5703                   || uid > gimple_uid (closest_before->stmt)))
5704             closest_before = use;
5705
5706           if (uid > gimple_uid (cand->incremented_at)
5707               && (closest_after == NULL
5708                   || uid < gimple_uid (closest_after->stmt)))
5709             closest_after = use;
5710         }
5711
5712       if (closest_before != NULL
5713           && autoinc_possible_for_pair (data, closest_before, cand))
5714         cand->ainc_use = closest_before;
5715       else if (closest_after != NULL
5716                && autoinc_possible_for_pair (data, closest_after, cand))
5717         cand->ainc_use = closest_after;
5718     }
5719 }
5720
5721 /* Relate compare use with all candidates.  */
5722
5723 static void
5724 relate_compare_use_with_all_cands (struct ivopts_data *data)
5725 {
5726   unsigned i, count = data->vcands.length ();
5727   for (i = 0; i < data->vgroups.length (); i++)
5728     {
5729       struct iv_group *group = data->vgroups[i];
5730
5731       if (group->type == USE_COMPARE)
5732         bitmap_set_range (group->related_cands, 0, count);
5733     }
5734 }
5735
5736 /* If PREFERRED_MODE is suitable and profitable, use the preferred
5737    PREFERRED_MODE to compute doloop iv base from niter: base = niter + 1.  */
5738
5739 static tree
5740 compute_doloop_base_on_mode (machine_mode preferred_mode, tree niter,
5741                              const widest_int &iterations_max)
5742 {
5743   tree ntype = TREE_TYPE (niter);
5744   tree pref_type = lang_hooks.types.type_for_mode (preferred_mode, 1);
5745   if (!pref_type)
5746     return fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5747                         build_int_cst (ntype, 1));
5748
5749   gcc_assert (TREE_CODE (pref_type) == INTEGER_TYPE);
5750
5751   int prec = TYPE_PRECISION (ntype);
5752   int pref_prec = TYPE_PRECISION (pref_type);
5753
5754   tree base;
5755
5756   /* Check if the PREFERRED_MODED is able to present niter.  */
5757   if (pref_prec > prec
5758       || wi::ltu_p (iterations_max,
5759                     widest_int::from (wi::max_value (pref_prec, UNSIGNED),
5760                                       UNSIGNED)))
5761     {
5762       /* No wrap, it is safe to use preferred type after niter + 1.  */
5763       if (wi::ltu_p (iterations_max,
5764                      widest_int::from (wi::max_value (prec, UNSIGNED),
5765                                        UNSIGNED)))
5766         {
5767           /* This could help to optimize "-1 +1" pair when niter looks
5768              like "n-1": n is in original mode.  "base = (n - 1) + 1"
5769              in PREFERRED_MODED: it could be base = (PREFERRED_TYPE)n.  */
5770           base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5771                               build_int_cst (ntype, 1));
5772           base = fold_convert (pref_type, base);
5773         }
5774
5775       /* To avoid wrap, convert niter to preferred type before plus 1.  */
5776       else
5777         {
5778           niter = fold_convert (pref_type, niter);
5779           base = fold_build2 (PLUS_EXPR, pref_type, unshare_expr (niter),
5780                               build_int_cst (pref_type, 1));
5781         }
5782     }
5783   else
5784     base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5785                         build_int_cst (ntype, 1));
5786   return base;
5787 }
5788
5789 /* Add one doloop dedicated IV candidate:
5790      - Base is (may_be_zero ? 1 : (niter + 1)).
5791      - Step is -1.  */
5792
5793 static void
5794 add_iv_candidate_for_doloop (struct ivopts_data *data)
5795 {
5796   tree_niter_desc *niter_desc = niter_for_single_dom_exit (data);
5797   gcc_assert (niter_desc && niter_desc->assumptions);
5798
5799   tree niter = niter_desc->niter;
5800   tree ntype = TREE_TYPE (niter);
5801   gcc_assert (TREE_CODE (ntype) == INTEGER_TYPE);
5802
5803   tree may_be_zero = niter_desc->may_be_zero;
5804   if (may_be_zero && integer_zerop (may_be_zero))
5805     may_be_zero = NULL_TREE;
5806   if (may_be_zero)
5807     {
5808       if (COMPARISON_CLASS_P (may_be_zero))
5809         {
5810           niter = fold_build3 (COND_EXPR, ntype, may_be_zero,
5811                                build_int_cst (ntype, 0),
5812                                rewrite_to_non_trapping_overflow (niter));
5813         }
5814       /* Don't try to obtain the iteration count expression when may_be_zero is
5815          integer_nonzerop (actually iteration count is one) or else.  */
5816       else
5817         return;
5818     }
5819
5820   machine_mode mode = TYPE_MODE (ntype);
5821   machine_mode pref_mode = targetm.preferred_doloop_mode (mode);
5822
5823   tree base;
5824   if (mode != pref_mode)
5825     {
5826       base = compute_doloop_base_on_mode (pref_mode, niter, niter_desc->max);
5827       ntype = TREE_TYPE (base);
5828     }
5829   else
5830     base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5831                         build_int_cst (ntype, 1));
5832
5833
5834   add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, true);
5835 }
5836
5837 /* Finds the candidates for the induction variables.  */
5838
5839 static void
5840 find_iv_candidates (struct ivopts_data *data)
5841 {
5842   /* Add commonly used ivs.  */
5843   add_standard_iv_candidates (data);
5844
5845   /* Add doloop dedicated ivs.  */
5846   if (data->doloop_use_p)
5847     add_iv_candidate_for_doloop (data);
5848
5849   /* Add old induction variables.  */
5850   add_iv_candidate_for_bivs (data);
5851
5852   /* Add induction variables derived from uses.  */
5853   add_iv_candidate_for_groups (data);
5854
5855   set_autoinc_for_original_candidates (data);
5856
5857   /* Record the important candidates.  */
5858   record_important_candidates (data);
5859
5860   /* Relate compare iv_use with all candidates.  */
5861   if (!data->consider_all_candidates)
5862     relate_compare_use_with_all_cands (data);
5863
5864   if (dump_file && (dump_flags & TDF_DETAILS))
5865     {
5866       unsigned i;
5867
5868       fprintf (dump_file, "\n<Important Candidates>:\t");
5869       for (i = 0; i < data->vcands.length (); i++)
5870         if (data->vcands[i]->important)
5871           fprintf (dump_file, " %d,", data->vcands[i]->id);
5872       fprintf (dump_file, "\n");
5873
5874       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5875       for (i = 0; i < data->vgroups.length (); i++)
5876         {
5877           struct iv_group *group = data->vgroups[i];
5878
5879           if (group->related_cands)
5880             {
5881               fprintf (dump_file, "  Group %d:\t", group->id);
5882               dump_bitmap (dump_file, group->related_cands);
5883             }
5884         }
5885       fprintf (dump_file, "\n");
5886     }
5887 }
5888
5889 /* Determines costs of computing use of iv with an iv candidate.  */
5890
5891 static void
5892 determine_group_iv_costs (struct ivopts_data *data)
5893 {
5894   unsigned i, j;
5895   struct iv_cand *cand;
5896   struct iv_group *group;
5897   bitmap to_clear = BITMAP_ALLOC (NULL);
5898
5899   alloc_use_cost_map (data);
5900
5901   for (i = 0; i < data->vgroups.length (); i++)
5902     {
5903       group = data->vgroups[i];
5904
5905       if (data->consider_all_candidates)
5906         {
5907           for (j = 0; j < data->vcands.length (); j++)
5908             {
5909               cand = data->vcands[j];
5910               determine_group_iv_cost (data, group, cand);
5911             }
5912         }
5913       else
5914         {
5915           bitmap_iterator bi;
5916
5917           EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5918             {
5919               cand = data->vcands[j];
5920               if (!determine_group_iv_cost (data, group, cand))
5921                 bitmap_set_bit (to_clear, j);
5922             }
5923
5924           /* Remove the candidates for that the cost is infinite from
5925              the list of related candidates.  */
5926           bitmap_and_compl_into (group->related_cands, to_clear);
5927           bitmap_clear (to_clear);
5928         }
5929     }
5930
5931   BITMAP_FREE (to_clear);
5932
5933   if (dump_file && (dump_flags & TDF_DETAILS))
5934     {
5935       bitmap_iterator bi;
5936
5937       /* Dump invariant variables.  */
5938       fprintf (dump_file, "\n<Invariant Vars>:\n");
5939       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5940         {
5941           struct version_info *info = ver_info (data, i);
5942           if (info->inv_id)
5943             {
5944               fprintf (dump_file, "Inv %d:\t", info->inv_id);
5945               print_generic_expr (dump_file, info->name, TDF_SLIM);
5946               fprintf (dump_file, "%s\n",
5947                        info->has_nonlin_use ? "" : "\t(eliminable)");
5948             }
5949         }
5950
5951       /* Dump invariant expressions.  */
5952       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5953       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5954
5955       for (hash_table<iv_inv_expr_hasher>::iterator it
5956            = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5957            ++it)
5958         list.safe_push (*it);
5959
5960       list.qsort (sort_iv_inv_expr_ent);
5961
5962       for (i = 0; i < list.length (); ++i)
5963         {
5964           fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5965           print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5966           fprintf (dump_file, "\n");
5967         }
5968
5969       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5970
5971       for (i = 0; i < data->vgroups.length (); i++)
5972         {
5973           group = data->vgroups[i];
5974
5975           fprintf (dump_file, "Group %d:\n", i);
5976           fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5977           for (j = 0; j < group->n_map_members; j++)
5978             {
5979               if (!group->cost_map[j].cand
5980                   || group->cost_map[j].cost.infinite_cost_p ())
5981                 continue;
5982
5983               fprintf (dump_file, "  %d\t%" PRId64 "\t%d\t",
5984                        group->cost_map[j].cand->id,
5985                        group->cost_map[j].cost.cost,
5986                        group->cost_map[j].cost.complexity);
5987               if (!group->cost_map[j].inv_exprs
5988                   || bitmap_empty_p (group->cost_map[j].inv_exprs))
5989                 fprintf (dump_file, "NIL;\t");
5990               else
5991                 bitmap_print (dump_file,
5992                               group->cost_map[j].inv_exprs, "", ";\t");
5993               if (!group->cost_map[j].inv_vars
5994                   || bitmap_empty_p (group->cost_map[j].inv_vars))
5995                 fprintf (dump_file, "NIL;\n");
5996               else
5997                 bitmap_print (dump_file,
5998                               group->cost_map[j].inv_vars, "", "\n");
5999             }
6000
6001           fprintf (dump_file, "\n");
6002         }
6003       fprintf (dump_file, "\n");
6004     }
6005 }
6006
6007 /* Determines cost of the candidate CAND.  */
6008
6009 static void
6010 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
6011 {
6012   comp_cost cost_base;
6013   int64_t cost, cost_step;
6014   tree base;
6015
6016   gcc_assert (cand->iv != NULL);
6017
6018   /* There are two costs associated with the candidate -- its increment
6019      and its initialization.  The second is almost negligible for any loop
6020      that rolls enough, so we take it just very little into account.  */
6021
6022   base = cand->iv->base;
6023   cost_base = force_var_cost (data, base, NULL);
6024   /* It will be exceptional that the iv register happens to be initialized with
6025      the proper value at no cost.  In general, there will at least be a regcopy
6026      or a const set.  */
6027   if (cost_base.cost == 0)
6028     cost_base.cost = COSTS_N_INSNS (1);
6029   /* Doloop decrement should be considered as zero cost.  */
6030   if (cand->doloop_p)
6031     cost_step = 0;
6032   else
6033     cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
6034   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
6035
6036   /* Prefer the original ivs unless we may gain something by replacing it.
6037      The reason is to make debugging simpler; so this is not relevant for
6038      artificial ivs created by other optimization passes.  */
6039   if ((cand->pos != IP_ORIGINAL
6040        || !SSA_NAME_VAR (cand->var_before)
6041        || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
6042       /* Prefer doloop as well.  */
6043       && !cand->doloop_p)
6044     cost++;
6045
6046   /* Prefer not to insert statements into latch unless there are some
6047      already (so that we do not create unnecessary jumps).  */
6048   if (cand->pos == IP_END
6049       && empty_block_p (ip_end_pos (data->current_loop)))
6050     cost++;
6051
6052   cand->cost = cost;
6053   cand->cost_step = cost_step;
6054 }
6055
6056 /* Determines costs of computation of the candidates.  */
6057
6058 static void
6059 determine_iv_costs (struct ivopts_data *data)
6060 {
6061   unsigned i;
6062
6063   if (dump_file && (dump_flags & TDF_DETAILS))
6064     {
6065       fprintf (dump_file, "<Candidate Costs>:\n");
6066       fprintf (dump_file, "  cand\tcost\n");
6067     }
6068
6069   for (i = 0; i < data->vcands.length (); i++)
6070     {
6071       struct iv_cand *cand = data->vcands[i];
6072
6073       determine_iv_cost (data, cand);
6074
6075       if (dump_file && (dump_flags & TDF_DETAILS))
6076         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
6077     }
6078
6079   if (dump_file && (dump_flags & TDF_DETAILS))
6080     fprintf (dump_file, "\n");
6081 }
6082
6083 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
6084    induction variables.  Note N_INVS includes both invariant variables and
6085    invariant expressions.  */
6086
6087 static unsigned
6088 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
6089                               unsigned n_cands)
6090 {
6091   unsigned cost;
6092   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
6093   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
6094   bool speed = data->speed;
6095
6096   /* If there is a call in the loop body, the call-clobbered registers
6097      are not available for loop invariants.  */
6098   if (data->body_includes_call)
6099     available_regs = available_regs - target_clobbered_regs;
6100
6101   /* If we have enough registers.  */
6102   if (regs_needed + target_res_regs < available_regs)
6103     cost = n_new;
6104   /* If close to running out of registers, try to preserve them.  */
6105   else if (regs_needed <= available_regs)
6106     cost = target_reg_cost [speed] * regs_needed;
6107   /* If we run out of available registers but the number of candidates
6108      does not, we penalize extra registers using target_spill_cost.  */
6109   else if (n_cands <= available_regs)
6110     cost = target_reg_cost [speed] * available_regs
6111            + target_spill_cost [speed] * (regs_needed - available_regs);
6112   /* If the number of candidates runs out available registers, we penalize
6113      extra candidate registers using target_spill_cost * 2.  Because it is
6114      more expensive to spill induction variable than invariant.  */
6115   else
6116     cost = target_reg_cost [speed] * available_regs
6117            + target_spill_cost [speed] * (n_cands - available_regs) * 2
6118            + target_spill_cost [speed] * (regs_needed - n_cands);
6119
6120   /* Finally, add the number of candidates, so that we prefer eliminating
6121      induction variables if possible.  */
6122   return cost + n_cands;
6123 }
6124
6125 /* For each size of the induction variable set determine the penalty.  */
6126
6127 static void
6128 determine_set_costs (struct ivopts_data *data)
6129 {
6130   unsigned j, n;
6131   gphi *phi;
6132   gphi_iterator psi;
6133   tree op;
6134   class loop *loop = data->current_loop;
6135   bitmap_iterator bi;
6136
6137   if (dump_file && (dump_flags & TDF_DETAILS))
6138     {
6139       fprintf (dump_file, "<Global Costs>:\n");
6140       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
6141       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
6142       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
6143       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
6144     }
6145
6146   n = 0;
6147   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6148     {
6149       phi = psi.phi ();
6150       op = PHI_RESULT (phi);
6151
6152       if (virtual_operand_p (op))
6153         continue;
6154
6155       if (get_iv (data, op))
6156         continue;
6157
6158       if (!POINTER_TYPE_P (TREE_TYPE (op))
6159           && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
6160         continue;
6161
6162       n++;
6163     }
6164
6165   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6166     {
6167       struct version_info *info = ver_info (data, j);
6168
6169       if (info->inv_id && info->has_nonlin_use)
6170         n++;
6171     }
6172
6173   data->regs_used = n;
6174   if (dump_file && (dump_flags & TDF_DETAILS))
6175     fprintf (dump_file, "  regs_used %d\n", n);
6176
6177   if (dump_file && (dump_flags & TDF_DETAILS))
6178     {
6179       fprintf (dump_file, "  cost for size:\n");
6180       fprintf (dump_file, "  ivs\tcost\n");
6181       for (j = 0; j <= 2 * target_avail_regs; j++)
6182         fprintf (dump_file, "  %d\t%d\n", j,
6183                  ivopts_estimate_reg_pressure (data, 0, j));
6184       fprintf (dump_file, "\n");
6185     }
6186 }
6187
6188 /* Returns true if A is a cheaper cost pair than B.  */
6189
6190 static bool
6191 cheaper_cost_pair (class cost_pair *a, class cost_pair *b)
6192 {
6193   if (!a)
6194     return false;
6195
6196   if (!b)
6197     return true;
6198
6199   if (a->cost < b->cost)
6200     return true;
6201
6202   if (b->cost < a->cost)
6203     return false;
6204
6205   /* In case the costs are the same, prefer the cheaper candidate.  */
6206   if (a->cand->cost < b->cand->cost)
6207     return true;
6208
6209   return false;
6210 }
6211
6212 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
6213    for more expensive, equal and cheaper respectively.  */
6214
6215 static int
6216 compare_cost_pair (class cost_pair *a, class cost_pair *b)
6217 {
6218   if (cheaper_cost_pair (a, b))
6219     return -1;
6220   if (cheaper_cost_pair (b, a))
6221     return 1;
6222
6223   return 0;
6224 }
6225
6226 /* Returns candidate by that USE is expressed in IVS.  */
6227
6228 static class cost_pair *
6229 iv_ca_cand_for_group (class iv_ca *ivs, struct iv_group *group)
6230 {
6231   return ivs->cand_for_group[group->id];
6232 }
6233
6234 /* Computes the cost field of IVS structure.  */
6235
6236 static void
6237 iv_ca_recount_cost (struct ivopts_data *data, class iv_ca *ivs)
6238 {
6239   comp_cost cost = ivs->cand_use_cost;
6240
6241   cost += ivs->cand_cost;
6242   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
6243   ivs->cost = cost;
6244 }
6245
6246 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
6247    and IVS.  */
6248
6249 static void
6250 iv_ca_set_remove_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6251 {
6252   bitmap_iterator bi;
6253   unsigned iid;
6254
6255   if (!invs)
6256     return;
6257
6258   gcc_assert (n_inv_uses != NULL);
6259   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6260     {
6261       n_inv_uses[iid]--;
6262       if (n_inv_uses[iid] == 0)
6263         ivs->n_invs--;
6264     }
6265 }
6266
6267 /* Set USE not to be expressed by any candidate in IVS.  */
6268
6269 static void
6270 iv_ca_set_no_cp (struct ivopts_data *data, class iv_ca *ivs,
6271                  struct iv_group *group)
6272 {
6273   unsigned gid = group->id, cid;
6274   class cost_pair *cp;
6275
6276   cp = ivs->cand_for_group[gid];
6277   if (!cp)
6278     return;
6279   cid = cp->cand->id;
6280
6281   ivs->bad_groups++;
6282   ivs->cand_for_group[gid] = NULL;
6283   ivs->n_cand_uses[cid]--;
6284
6285   if (ivs->n_cand_uses[cid] == 0)
6286     {
6287       bitmap_clear_bit (ivs->cands, cid);
6288       if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6289         ivs->n_cands--;
6290       ivs->cand_cost -= cp->cand->cost;
6291       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6292       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6293     }
6294
6295   ivs->cand_use_cost -= cp->cost;
6296   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6297   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6298   iv_ca_recount_cost (data, ivs);
6299 }
6300
6301 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
6302    IVS.  */
6303
6304 static void
6305 iv_ca_set_add_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6306 {
6307   bitmap_iterator bi;
6308   unsigned iid;
6309
6310   if (!invs)
6311     return;
6312
6313   gcc_assert (n_inv_uses != NULL);
6314   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6315     {
6316       n_inv_uses[iid]++;
6317       if (n_inv_uses[iid] == 1)
6318         ivs->n_invs++;
6319     }
6320 }
6321
6322 /* Set cost pair for GROUP in set IVS to CP.  */
6323
6324 static void
6325 iv_ca_set_cp (struct ivopts_data *data, class iv_ca *ivs,
6326               struct iv_group *group, class cost_pair *cp)
6327 {
6328   unsigned gid = group->id, cid;
6329
6330   if (ivs->cand_for_group[gid] == cp)
6331     return;
6332
6333   if (ivs->cand_for_group[gid])
6334     iv_ca_set_no_cp (data, ivs, group);
6335
6336   if (cp)
6337     {
6338       cid = cp->cand->id;
6339
6340       ivs->bad_groups--;
6341       ivs->cand_for_group[gid] = cp;
6342       ivs->n_cand_uses[cid]++;
6343       if (ivs->n_cand_uses[cid] == 1)
6344         {
6345           bitmap_set_bit (ivs->cands, cid);
6346           if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6347             ivs->n_cands++;
6348           ivs->cand_cost += cp->cand->cost;
6349           iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6350           iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6351         }
6352
6353       ivs->cand_use_cost += cp->cost;
6354       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6355       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6356       iv_ca_recount_cost (data, ivs);
6357     }
6358 }
6359
6360 /* Extend set IVS by expressing USE by some of the candidates in it
6361    if possible.  Consider all important candidates if candidates in
6362    set IVS don't give any result.  */
6363
6364 static void
6365 iv_ca_add_group (struct ivopts_data *data, class iv_ca *ivs,
6366                struct iv_group *group)
6367 {
6368   class cost_pair *best_cp = NULL, *cp;
6369   bitmap_iterator bi;
6370   unsigned i;
6371   struct iv_cand *cand;
6372
6373   gcc_assert (ivs->upto >= group->id);
6374   ivs->upto++;
6375   ivs->bad_groups++;
6376
6377   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6378     {
6379       cand = data->vcands[i];
6380       cp = get_group_iv_cost (data, group, cand);
6381       if (cheaper_cost_pair (cp, best_cp))
6382         best_cp = cp;
6383     }
6384
6385   if (best_cp == NULL)
6386     {
6387       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6388         {
6389           cand = data->vcands[i];
6390           cp = get_group_iv_cost (data, group, cand);
6391           if (cheaper_cost_pair (cp, best_cp))
6392             best_cp = cp;
6393         }
6394     }
6395
6396   iv_ca_set_cp (data, ivs, group, best_cp);
6397 }
6398
6399 /* Get cost for assignment IVS.  */
6400
6401 static comp_cost
6402 iv_ca_cost (class iv_ca *ivs)
6403 {
6404   /* This was a conditional expression but it triggered a bug in
6405      Sun C 5.5.  */
6406   if (ivs->bad_groups)
6407     return infinite_cost;
6408   else
6409     return ivs->cost;
6410 }
6411
6412 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
6413    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
6414    respectively.  */
6415
6416 static int
6417 iv_ca_compare_deps (struct ivopts_data *data, class iv_ca *ivs,
6418                     struct iv_group *group, class cost_pair *old_cp,
6419                     class cost_pair *new_cp)
6420 {
6421   gcc_assert (old_cp && new_cp && old_cp != new_cp);
6422   unsigned old_n_invs = ivs->n_invs;
6423   iv_ca_set_cp (data, ivs, group, new_cp);
6424   unsigned new_n_invs = ivs->n_invs;
6425   iv_ca_set_cp (data, ivs, group, old_cp);
6426
6427   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
6428 }
6429
6430 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6431    it before NEXT.  */
6432
6433 static struct iv_ca_delta *
6434 iv_ca_delta_add (struct iv_group *group, class cost_pair *old_cp,
6435                  class cost_pair *new_cp, struct iv_ca_delta *next)
6436 {
6437   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6438
6439   change->group = group;
6440   change->old_cp = old_cp;
6441   change->new_cp = new_cp;
6442   change->next = next;
6443
6444   return change;
6445 }
6446
6447 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6448    are rewritten.  */
6449
6450 static struct iv_ca_delta *
6451 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6452 {
6453   struct iv_ca_delta *last;
6454
6455   if (!l2)
6456     return l1;
6457
6458   if (!l1)
6459     return l2;
6460
6461   for (last = l1; last->next; last = last->next)
6462     continue;
6463   last->next = l2;
6464
6465   return l1;
6466 }
6467
6468 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6469
6470 static struct iv_ca_delta *
6471 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6472 {
6473   struct iv_ca_delta *act, *next, *prev = NULL;
6474
6475   for (act = delta; act; act = next)
6476     {
6477       next = act->next;
6478       act->next = prev;
6479       prev = act;
6480
6481       std::swap (act->old_cp, act->new_cp);
6482     }
6483
6484   return prev;
6485 }
6486
6487 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6488    reverted instead.  */
6489
6490 static void
6491 iv_ca_delta_commit (struct ivopts_data *data, class iv_ca *ivs,
6492                     struct iv_ca_delta *delta, bool forward)
6493 {
6494   class cost_pair *from, *to;
6495   struct iv_ca_delta *act;
6496
6497   if (!forward)
6498     delta = iv_ca_delta_reverse (delta);
6499
6500   for (act = delta; act; act = act->next)
6501     {
6502       from = act->old_cp;
6503       to = act->new_cp;
6504       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6505       iv_ca_set_cp (data, ivs, act->group, to);
6506     }
6507
6508   if (!forward)
6509     iv_ca_delta_reverse (delta);
6510 }
6511
6512 /* Returns true if CAND is used in IVS.  */
6513
6514 static bool
6515 iv_ca_cand_used_p (class iv_ca *ivs, struct iv_cand *cand)
6516 {
6517   return ivs->n_cand_uses[cand->id] > 0;
6518 }
6519
6520 /* Returns number of induction variable candidates in the set IVS.  */
6521
6522 static unsigned
6523 iv_ca_n_cands (class iv_ca *ivs)
6524 {
6525   return ivs->n_cands;
6526 }
6527
6528 /* Free the list of changes DELTA.  */
6529
6530 static void
6531 iv_ca_delta_free (struct iv_ca_delta **delta)
6532 {
6533   struct iv_ca_delta *act, *next;
6534
6535   for (act = *delta; act; act = next)
6536     {
6537       next = act->next;
6538       free (act);
6539     }
6540
6541   *delta = NULL;
6542 }
6543
6544 /* Allocates new iv candidates assignment.  */
6545
6546 static class iv_ca *
6547 iv_ca_new (struct ivopts_data *data)
6548 {
6549   class iv_ca *nw = XNEW (class iv_ca);
6550
6551   nw->upto = 0;
6552   nw->bad_groups = 0;
6553   nw->cand_for_group = XCNEWVEC (class cost_pair *,
6554                                  data->vgroups.length ());
6555   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6556   nw->cands = BITMAP_ALLOC (NULL);
6557   nw->n_cands = 0;
6558   nw->n_invs = 0;
6559   nw->cand_use_cost = no_cost;
6560   nw->cand_cost = 0;
6561   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6562   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6563   nw->cost = no_cost;
6564
6565   return nw;
6566 }
6567
6568 /* Free memory occupied by the set IVS.  */
6569
6570 static void
6571 iv_ca_free (class iv_ca **ivs)
6572 {
6573   free ((*ivs)->cand_for_group);
6574   free ((*ivs)->n_cand_uses);
6575   BITMAP_FREE ((*ivs)->cands);
6576   free ((*ivs)->n_inv_var_uses);
6577   free ((*ivs)->n_inv_expr_uses);
6578   free (*ivs);
6579   *ivs = NULL;
6580 }
6581
6582 /* Dumps IVS to FILE.  */
6583
6584 static void
6585 iv_ca_dump (struct ivopts_data *data, FILE *file, class iv_ca *ivs)
6586 {
6587   unsigned i;
6588   comp_cost cost = iv_ca_cost (ivs);
6589
6590   fprintf (file, "  cost: %" PRId64 " (complexity %d)\n", cost.cost,
6591            cost.complexity);
6592   fprintf (file, "  reg_cost: %d\n",
6593            ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands));
6594   fprintf (file, "  cand_cost: %" PRId64 "\n  cand_group_cost: "
6595            "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6596            ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6597   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6598
6599   for (i = 0; i < ivs->upto; i++)
6600     {
6601       struct iv_group *group = data->vgroups[i];
6602       class cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6603       if (cp)
6604         fprintf (file, "   group:%d --> iv_cand:%d, cost=("
6605                  "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6606                  cp->cost.cost, cp->cost.complexity);
6607       else
6608         fprintf (file, "   group:%d --> ??\n", group->id);
6609     }
6610
6611   const char *pref = "";
6612   fprintf (file, "  invariant variables: ");
6613   for (i = 1; i <= data->max_inv_var_id; i++)
6614     if (ivs->n_inv_var_uses[i])
6615       {
6616         fprintf (file, "%s%d", pref, i);
6617         pref = ", ";
6618       }
6619
6620   pref = "";
6621   fprintf (file, "\n  invariant expressions: ");
6622   for (i = 1; i <= data->max_inv_expr_id; i++)
6623     if (ivs->n_inv_expr_uses[i])
6624       {
6625         fprintf (file, "%s%d", pref, i);
6626         pref = ", ";
6627       }
6628
6629   fprintf (file, "\n\n");
6630 }
6631
6632 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6633    new set, and store differences in DELTA.  Number of induction variables
6634    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6635    the function will try to find a solution with mimimal iv candidates.  */
6636
6637 static comp_cost
6638 iv_ca_extend (struct ivopts_data *data, class iv_ca *ivs,
6639               struct iv_cand *cand, struct iv_ca_delta **delta,
6640               unsigned *n_ivs, bool min_ncand)
6641 {
6642   unsigned i;
6643   comp_cost cost;
6644   struct iv_group *group;
6645   class cost_pair *old_cp, *new_cp;
6646
6647   *delta = NULL;
6648   for (i = 0; i < ivs->upto; i++)
6649     {
6650       group = data->vgroups[i];
6651       old_cp = iv_ca_cand_for_group (ivs, group);
6652
6653       if (old_cp
6654           && old_cp->cand == cand)
6655         continue;
6656
6657       new_cp = get_group_iv_cost (data, group, cand);
6658       if (!new_cp)
6659         continue;
6660
6661       if (!min_ncand)
6662         {
6663           int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6664           /* Skip if new_cp depends on more invariants.  */
6665           if (cmp_invs > 0)
6666             continue;
6667
6668           int cmp_cost = compare_cost_pair (new_cp, old_cp);
6669           /* Skip if new_cp is not cheaper.  */
6670           if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6671             continue;
6672         }
6673
6674       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6675     }
6676
6677   iv_ca_delta_commit (data, ivs, *delta, true);
6678   cost = iv_ca_cost (ivs);
6679   if (n_ivs)
6680     *n_ivs = iv_ca_n_cands (ivs);
6681   iv_ca_delta_commit (data, ivs, *delta, false);
6682
6683   return cost;
6684 }
6685
6686 /* Try narrowing set IVS by removing CAND.  Return the cost of
6687    the new set and store the differences in DELTA.  START is
6688    the candidate with which we start narrowing.  */
6689
6690 static comp_cost
6691 iv_ca_narrow (struct ivopts_data *data, class iv_ca *ivs,
6692               struct iv_cand *cand, struct iv_cand *start,
6693               struct iv_ca_delta **delta)
6694 {
6695   unsigned i, ci;
6696   struct iv_group *group;
6697   class cost_pair *old_cp, *new_cp, *cp;
6698   bitmap_iterator bi;
6699   struct iv_cand *cnd;
6700   comp_cost cost, best_cost, acost;
6701
6702   *delta = NULL;
6703   for (i = 0; i < data->vgroups.length (); i++)
6704     {
6705       group = data->vgroups[i];
6706
6707       old_cp = iv_ca_cand_for_group (ivs, group);
6708       if (old_cp->cand != cand)
6709         continue;
6710
6711       best_cost = iv_ca_cost (ivs);
6712       /* Start narrowing with START.  */
6713       new_cp = get_group_iv_cost (data, group, start);
6714
6715       if (data->consider_all_candidates)
6716         {
6717           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6718             {
6719               if (ci == cand->id || (start && ci == start->id))
6720                 continue;
6721
6722               cnd = data->vcands[ci];
6723
6724               cp = get_group_iv_cost (data, group, cnd);
6725               if (!cp)
6726                 continue;
6727
6728               iv_ca_set_cp (data, ivs, group, cp);
6729               acost = iv_ca_cost (ivs);
6730
6731               if (acost < best_cost)
6732                 {
6733                   best_cost = acost;
6734                   new_cp = cp;
6735                 }
6736             }
6737         }
6738       else
6739         {
6740           EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6741             {
6742               if (ci == cand->id || (start && ci == start->id))
6743                 continue;
6744
6745               cnd = data->vcands[ci];
6746
6747               cp = get_group_iv_cost (data, group, cnd);
6748               if (!cp)
6749                 continue;
6750
6751               iv_ca_set_cp (data, ivs, group, cp);
6752               acost = iv_ca_cost (ivs);
6753
6754               if (acost < best_cost)
6755                 {
6756                   best_cost = acost;
6757                   new_cp = cp;
6758                 }
6759             }
6760         }
6761       /* Restore to old cp for use.  */
6762       iv_ca_set_cp (data, ivs, group, old_cp);
6763
6764       if (!new_cp)
6765         {
6766           iv_ca_delta_free (delta);
6767           return infinite_cost;
6768         }
6769
6770       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6771     }
6772
6773   iv_ca_delta_commit (data, ivs, *delta, true);
6774   cost = iv_ca_cost (ivs);
6775   iv_ca_delta_commit (data, ivs, *delta, false);
6776
6777   return cost;
6778 }
6779
6780 /* Try optimizing the set of candidates IVS by removing candidates different
6781    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6782    differences in DELTA.  */
6783
6784 static comp_cost
6785 iv_ca_prune (struct ivopts_data *data, class iv_ca *ivs,
6786              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6787 {
6788   bitmap_iterator bi;
6789   struct iv_ca_delta *act_delta, *best_delta;
6790   unsigned i;
6791   comp_cost best_cost, acost;
6792   struct iv_cand *cand;
6793
6794   best_delta = NULL;
6795   best_cost = iv_ca_cost (ivs);
6796
6797   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6798     {
6799       cand = data->vcands[i];
6800
6801       if (cand == except_cand)
6802         continue;
6803
6804       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6805
6806       if (acost < best_cost)
6807         {
6808           best_cost = acost;
6809           iv_ca_delta_free (&best_delta);
6810           best_delta = act_delta;
6811         }
6812       else
6813         iv_ca_delta_free (&act_delta);
6814     }
6815
6816   if (!best_delta)
6817     {
6818       *delta = NULL;
6819       return best_cost;
6820     }
6821
6822   /* Recurse to possibly remove other unnecessary ivs.  */
6823   iv_ca_delta_commit (data, ivs, best_delta, true);
6824   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6825   iv_ca_delta_commit (data, ivs, best_delta, false);
6826   *delta = iv_ca_delta_join (best_delta, *delta);
6827   return best_cost;
6828 }
6829
6830 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6831    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6832    the corresponding cost_pair, otherwise just return BEST_CP.  */
6833
6834 static class cost_pair*
6835 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6836                         unsigned int cand_idx, struct iv_cand *old_cand,
6837                         class cost_pair *best_cp)
6838 {
6839   struct iv_cand *cand;
6840   class cost_pair *cp;
6841
6842   gcc_assert (old_cand != NULL && best_cp != NULL);
6843   if (cand_idx == old_cand->id)
6844     return best_cp;
6845
6846   cand = data->vcands[cand_idx];
6847   cp = get_group_iv_cost (data, group, cand);
6848   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6849     return cp;
6850
6851   return best_cp;
6852 }
6853
6854 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6855    which are used by more than one iv uses.  For each of those candidates,
6856    this function tries to represent iv uses under that candidate using
6857    other ones with lower local cost, then tries to prune the new set.
6858    If the new set has lower cost, It returns the new cost after recording
6859    candidate replacement in list DELTA.  */
6860
6861 static comp_cost
6862 iv_ca_replace (struct ivopts_data *data, class iv_ca *ivs,
6863                struct iv_ca_delta **delta)
6864 {
6865   bitmap_iterator bi, bj;
6866   unsigned int i, j, k;
6867   struct iv_cand *cand;
6868   comp_cost orig_cost, acost;
6869   struct iv_ca_delta *act_delta, *tmp_delta;
6870   class cost_pair *old_cp, *best_cp = NULL;
6871
6872   *delta = NULL;
6873   orig_cost = iv_ca_cost (ivs);
6874
6875   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6876     {
6877       if (ivs->n_cand_uses[i] == 1
6878           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6879         continue;
6880
6881       cand = data->vcands[i];
6882
6883       act_delta = NULL;
6884       /*  Represent uses under current candidate using other ones with
6885           lower local cost.  */
6886       for (j = 0; j < ivs->upto; j++)
6887         {
6888           struct iv_group *group = data->vgroups[j];
6889           old_cp = iv_ca_cand_for_group (ivs, group);
6890
6891           if (old_cp->cand != cand)
6892             continue;
6893
6894           best_cp = old_cp;
6895           if (data->consider_all_candidates)
6896             for (k = 0; k < data->vcands.length (); k++)
6897               best_cp = cheaper_cost_with_cand (data, group, k,
6898                                                 old_cp->cand, best_cp);
6899           else
6900             EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6901               best_cp = cheaper_cost_with_cand (data, group, k,
6902                                                 old_cp->cand, best_cp);
6903
6904           if (best_cp == old_cp)
6905             continue;
6906
6907           act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6908         }
6909       /* No need for further prune.  */
6910       if (!act_delta)
6911         continue;
6912
6913       /* Prune the new candidate set.  */
6914       iv_ca_delta_commit (data, ivs, act_delta, true);
6915       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6916       iv_ca_delta_commit (data, ivs, act_delta, false);
6917       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6918
6919       if (acost < orig_cost)
6920         {
6921           *delta = act_delta;
6922           return acost;
6923         }
6924       else
6925         iv_ca_delta_free (&act_delta);
6926     }
6927
6928   return orig_cost;
6929 }
6930
6931 /* Tries to extend the sets IVS in the best possible way in order to
6932    express the GROUP.  If ORIGINALP is true, prefer candidates from
6933    the original set of IVs, otherwise favor important candidates not
6934    based on any memory object.  */
6935
6936 static bool
6937 try_add_cand_for (struct ivopts_data *data, class iv_ca *ivs,
6938                   struct iv_group *group, bool originalp)
6939 {
6940   comp_cost best_cost, act_cost;
6941   unsigned i;
6942   bitmap_iterator bi;
6943   struct iv_cand *cand;
6944   struct iv_ca_delta *best_delta = NULL, *act_delta;
6945   class cost_pair *cp;
6946
6947   iv_ca_add_group (data, ivs, group);
6948   best_cost = iv_ca_cost (ivs);
6949   cp = iv_ca_cand_for_group (ivs, group);
6950   if (cp)
6951     {
6952       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6953       iv_ca_set_no_cp (data, ivs, group);
6954     }
6955
6956   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6957      first try important candidates not based on any memory object.  Only if
6958      this fails, try the specific ones.  Rationale -- in loops with many
6959      variables the best choice often is to use just one generic biv.  If we
6960      added here many ivs specific to the uses, the optimization algorithm later
6961      would be likely to get stuck in a local minimum, thus causing us to create
6962      too many ivs.  The approach from few ivs to more seems more likely to be
6963      successful -- starting from few ivs, replacing an expensive use by a
6964      specific iv should always be a win.  */
6965   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6966     {
6967       cand = data->vcands[i];
6968
6969       if (originalp && cand->pos !=IP_ORIGINAL)
6970         continue;
6971
6972       if (!originalp && cand->iv->base_object != NULL_TREE)
6973         continue;
6974
6975       if (iv_ca_cand_used_p (ivs, cand))
6976         continue;
6977
6978       cp = get_group_iv_cost (data, group, cand);
6979       if (!cp)
6980         continue;
6981
6982       iv_ca_set_cp (data, ivs, group, cp);
6983       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6984                                true);
6985       iv_ca_set_no_cp (data, ivs, group);
6986       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6987
6988       if (act_cost < best_cost)
6989         {
6990           best_cost = act_cost;
6991
6992           iv_ca_delta_free (&best_delta);
6993           best_delta = act_delta;
6994         }
6995       else
6996         iv_ca_delta_free (&act_delta);
6997     }
6998
6999   if (best_cost.infinite_cost_p ())
7000     {
7001       for (i = 0; i < group->n_map_members; i++)
7002         {
7003           cp = group->cost_map + i;
7004           cand = cp->cand;
7005           if (!cand)
7006             continue;
7007
7008           /* Already tried this.  */
7009           if (cand->important)
7010             {
7011               if (originalp && cand->pos == IP_ORIGINAL)
7012                 continue;
7013               if (!originalp && cand->iv->base_object == NULL_TREE)
7014                 continue;
7015             }
7016
7017           if (iv_ca_cand_used_p (ivs, cand))
7018             continue;
7019
7020           act_delta = NULL;
7021           iv_ca_set_cp (data, ivs, group, cp);
7022           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
7023           iv_ca_set_no_cp (data, ivs, group);
7024           act_delta = iv_ca_delta_add (group,
7025                                        iv_ca_cand_for_group (ivs, group),
7026                                        cp, act_delta);
7027
7028           if (act_cost < best_cost)
7029             {
7030               best_cost = act_cost;
7031
7032               if (best_delta)
7033                 iv_ca_delta_free (&best_delta);
7034               best_delta = act_delta;
7035             }
7036           else
7037             iv_ca_delta_free (&act_delta);
7038         }
7039     }
7040
7041   iv_ca_delta_commit (data, ivs, best_delta, true);
7042   iv_ca_delta_free (&best_delta);
7043
7044   return !best_cost.infinite_cost_p ();
7045 }
7046
7047 /* Finds an initial assignment of candidates to uses.  */
7048
7049 static class iv_ca *
7050 get_initial_solution (struct ivopts_data *data, bool originalp)
7051 {
7052   unsigned i;
7053   class iv_ca *ivs = iv_ca_new (data);
7054
7055   for (i = 0; i < data->vgroups.length (); i++)
7056     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
7057       {
7058         iv_ca_free (&ivs);
7059         return NULL;
7060       }
7061
7062   return ivs;
7063 }
7064
7065 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
7066    points to a bool variable, this function tries to break local
7067    optimal fixed-point by replacing candidates in IVS if it's true.  */
7068
7069 static bool
7070 try_improve_iv_set (struct ivopts_data *data,
7071                     class iv_ca *ivs, bool *try_replace_p)
7072 {
7073   unsigned i, n_ivs;
7074   comp_cost acost, best_cost = iv_ca_cost (ivs);
7075   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
7076   struct iv_cand *cand;
7077
7078   /* Try extending the set of induction variables by one.  */
7079   for (i = 0; i < data->vcands.length (); i++)
7080     {
7081       cand = data->vcands[i];
7082
7083       if (iv_ca_cand_used_p (ivs, cand))
7084         continue;
7085
7086       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
7087       if (!act_delta)
7088         continue;
7089
7090       /* If we successfully added the candidate and the set is small enough,
7091          try optimizing it by removing other candidates.  */
7092       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
7093         {
7094           iv_ca_delta_commit (data, ivs, act_delta, true);
7095           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
7096           iv_ca_delta_commit (data, ivs, act_delta, false);
7097           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
7098         }
7099
7100       if (acost < best_cost)
7101         {
7102           best_cost = acost;
7103           iv_ca_delta_free (&best_delta);
7104           best_delta = act_delta;
7105         }
7106       else
7107         iv_ca_delta_free (&act_delta);
7108     }
7109
7110   if (!best_delta)
7111     {
7112       /* Try removing the candidates from the set instead.  */
7113       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
7114
7115       if (!best_delta && *try_replace_p)
7116         {
7117           *try_replace_p = false;
7118           /* So far candidate selecting algorithm tends to choose fewer IVs
7119              so that it can handle cases in which loops have many variables
7120              but the best choice is often to use only one general biv.  One
7121              weakness is it can't handle opposite cases, in which different
7122              candidates should be chosen with respect to each use.  To solve
7123              the problem, we replace candidates in a manner described by the
7124              comments of iv_ca_replace, thus give general algorithm a chance
7125              to break local optimal fixed-point in these cases.  */
7126           best_cost = iv_ca_replace (data, ivs, &best_delta);
7127         }
7128
7129       if (!best_delta)
7130         return false;
7131     }
7132
7133   iv_ca_delta_commit (data, ivs, best_delta, true);
7134   iv_ca_delta_free (&best_delta);
7135   return best_cost == iv_ca_cost (ivs);
7136 }
7137
7138 /* Attempts to find the optimal set of induction variables.  We do simple
7139    greedy heuristic -- we try to replace at most one candidate in the selected
7140    solution and remove the unused ivs while this improves the cost.  */
7141
7142 static class iv_ca *
7143 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
7144 {
7145   class iv_ca *set;
7146   bool try_replace_p = true;
7147
7148   /* Get the initial solution.  */
7149   set = get_initial_solution (data, originalp);
7150   if (!set)
7151     {
7152       if (dump_file && (dump_flags & TDF_DETAILS))
7153         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
7154       return NULL;
7155     }
7156
7157   if (dump_file && (dump_flags & TDF_DETAILS))
7158     {
7159       fprintf (dump_file, "Initial set of candidates:\n");
7160       iv_ca_dump (data, dump_file, set);
7161     }
7162
7163   while (try_improve_iv_set (data, set, &try_replace_p))
7164     {
7165       if (dump_file && (dump_flags & TDF_DETAILS))
7166         {
7167           fprintf (dump_file, "Improved to:\n");
7168           iv_ca_dump (data, dump_file, set);
7169         }
7170     }
7171
7172   /* If the set has infinite_cost, it can't be optimal.  */
7173   if (iv_ca_cost (set).infinite_cost_p ())
7174     {
7175       if (dump_file && (dump_flags & TDF_DETAILS))
7176         fprintf (dump_file,
7177                  "Overflow to infinite cost in try_improve_iv_set.\n");
7178       iv_ca_free (&set);
7179     }
7180   return set;
7181 }
7182
7183 static class iv_ca *
7184 find_optimal_iv_set (struct ivopts_data *data)
7185 {
7186   unsigned i;
7187   comp_cost cost, origcost;
7188   class iv_ca *set, *origset;
7189
7190   /* Determine the cost based on a strategy that starts with original IVs,
7191      and try again using a strategy that prefers candidates not based
7192      on any IVs.  */
7193   origset = find_optimal_iv_set_1 (data, true);
7194   set = find_optimal_iv_set_1 (data, false);
7195
7196   if (!origset && !set)
7197     return NULL;
7198
7199   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7200   cost = set ? iv_ca_cost (set) : infinite_cost;
7201
7202   if (dump_file && (dump_flags & TDF_DETAILS))
7203     {
7204       fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
7205                origcost.cost, origcost.complexity);
7206       fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
7207                cost.cost, cost.complexity);
7208     }
7209
7210   /* Choose the one with the best cost.  */
7211   if (origcost <= cost)
7212     {
7213       if (set)
7214         iv_ca_free (&set);
7215       set = origset;
7216     }
7217   else if (origset)
7218     iv_ca_free (&origset);
7219
7220   for (i = 0; i < data->vgroups.length (); i++)
7221     {
7222       struct iv_group *group = data->vgroups[i];
7223       group->selected = iv_ca_cand_for_group (set, group)->cand;
7224     }
7225
7226   return set;
7227 }
7228
7229 /* Creates a new induction variable corresponding to CAND.  */
7230
7231 static void
7232 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7233 {
7234   gimple_stmt_iterator incr_pos;
7235   tree base;
7236   struct iv_use *use;
7237   struct iv_group *group;
7238   bool after = false;
7239
7240   gcc_assert (cand->iv != NULL);
7241
7242   switch (cand->pos)
7243     {
7244     case IP_NORMAL:
7245       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7246       break;
7247
7248     case IP_END:
7249       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7250       after = true;
7251       if (!gsi_end_p (incr_pos) && stmt_ends_bb_p (gsi_stmt (incr_pos)))
7252         {
7253           edge e = find_edge (gsi_bb (incr_pos), data->current_loop->header);
7254           incr_pos = gsi_after_labels (split_edge (e));
7255           after = false;
7256         }
7257       break;
7258
7259     case IP_AFTER_USE:
7260       after = true;
7261       /* fall through */
7262     case IP_BEFORE_USE:
7263       incr_pos = gsi_for_stmt (cand->incremented_at);
7264       break;
7265
7266     case IP_ORIGINAL:
7267       /* Mark that the iv is preserved.  */
7268       name_info (data, cand->var_before)->preserve_biv = true;
7269       name_info (data, cand->var_after)->preserve_biv = true;
7270
7271       /* Rewrite the increment so that it uses var_before directly.  */
7272       use = find_interesting_uses_op (data, cand->var_after);
7273       group = data->vgroups[use->group_id];
7274       group->selected = cand;
7275       return;
7276     }
7277
7278   gimple_add_tmp_var (cand->var_before);
7279
7280   base = unshare_expr (cand->iv->base);
7281
7282   create_iv (base, PLUS_EXPR, unshare_expr (cand->iv->step),
7283              cand->var_before, data->current_loop,
7284              &incr_pos, after, &cand->var_before, &cand->var_after);
7285 }
7286
7287 /* Creates new induction variables described in SET.  */
7288
7289 static void
7290 create_new_ivs (struct ivopts_data *data, class iv_ca *set)
7291 {
7292   unsigned i;
7293   struct iv_cand *cand;
7294   bitmap_iterator bi;
7295
7296   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7297     {
7298       cand = data->vcands[i];
7299       create_new_iv (data, cand);
7300     }
7301
7302   if (dump_file && (dump_flags & TDF_DETAILS))
7303     {
7304       fprintf (dump_file, "Selected IV set for loop %d",
7305                data->current_loop->num);
7306       if (data->loop_loc != UNKNOWN_LOCATION)
7307         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7308                  LOCATION_LINE (data->loop_loc));
7309       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7310                avg_loop_niter (data->current_loop));
7311       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7312       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7313         {
7314           cand = data->vcands[i];
7315           dump_cand (dump_file, cand);
7316         }
7317       fprintf (dump_file, "\n");
7318     }
7319 }
7320
7321 /* Rewrites USE (definition of iv used in a nonlinear expression)
7322    using candidate CAND.  */
7323
7324 static void
7325 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7326                             struct iv_use *use, struct iv_cand *cand)
7327 {
7328   gassign *ass;
7329   gimple_stmt_iterator bsi;
7330   tree comp, type = get_use_type (use), tgt;
7331
7332   /* An important special case -- if we are asked to express value of
7333      the original iv by itself, just exit; there is no need to
7334      introduce a new computation (that might also need casting the
7335      variable to unsigned and back).  */
7336   if (cand->pos == IP_ORIGINAL
7337       && cand->incremented_at == use->stmt)
7338     {
7339       tree op = NULL_TREE;
7340       enum tree_code stmt_code;
7341
7342       gcc_assert (is_gimple_assign (use->stmt));
7343       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7344
7345       /* Check whether we may leave the computation unchanged.
7346          This is the case only if it does not rely on other
7347          computations in the loop -- otherwise, the computation
7348          we rely upon may be removed in remove_unused_ivs,
7349          thus leading to ICE.  */
7350       stmt_code = gimple_assign_rhs_code (use->stmt);
7351       if (stmt_code == PLUS_EXPR
7352           || stmt_code == MINUS_EXPR
7353           || stmt_code == POINTER_PLUS_EXPR)
7354         {
7355           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7356             op = gimple_assign_rhs2 (use->stmt);
7357           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7358             op = gimple_assign_rhs1 (use->stmt);
7359         }
7360
7361       if (op != NULL_TREE)
7362         {
7363           if (expr_invariant_in_loop_p (data->current_loop, op))
7364             return;
7365           if (TREE_CODE (op) == SSA_NAME)
7366             {
7367               struct iv *iv = get_iv (data, op);
7368               if (iv != NULL && integer_zerop (iv->step))
7369                 return;
7370             }
7371         }
7372     }
7373
7374   switch (gimple_code (use->stmt))
7375     {
7376     case GIMPLE_PHI:
7377       tgt = PHI_RESULT (use->stmt);
7378
7379       /* If we should keep the biv, do not replace it.  */
7380       if (name_info (data, tgt)->preserve_biv)
7381         return;
7382
7383       bsi = gsi_after_labels (gimple_bb (use->stmt));
7384       break;
7385
7386     case GIMPLE_ASSIGN:
7387       tgt = gimple_assign_lhs (use->stmt);
7388       bsi = gsi_for_stmt (use->stmt);
7389       break;
7390
7391     default:
7392       gcc_unreachable ();
7393     }
7394
7395   aff_tree aff_inv, aff_var;
7396   if (!get_computation_aff_1 (data->current_loop, use->stmt,
7397                               use, cand, &aff_inv, &aff_var))
7398     gcc_unreachable ();
7399
7400   unshare_aff_combination (&aff_inv);
7401   unshare_aff_combination (&aff_var);
7402   /* Prefer CSE opportunity than loop invariant by adding offset at last
7403      so that iv_uses have different offsets can be CSEed.  */
7404   poly_widest_int offset = aff_inv.offset;
7405   aff_inv.offset = 0;
7406
7407   gimple_seq stmt_list = NULL, seq = NULL;
7408   tree comp_op1 = aff_combination_to_tree (&aff_inv);
7409   tree comp_op2 = aff_combination_to_tree (&aff_var);
7410   gcc_assert (comp_op1 && comp_op2);
7411
7412   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
7413   gimple_seq_add_seq (&stmt_list, seq);
7414   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
7415   gimple_seq_add_seq (&stmt_list, seq);
7416
7417   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
7418     std::swap (comp_op1, comp_op2);
7419
7420   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
7421     {
7422       comp = fold_build_pointer_plus (comp_op1,
7423                                       fold_convert (sizetype, comp_op2));
7424       comp = fold_build_pointer_plus (comp,
7425                                       wide_int_to_tree (sizetype, offset));
7426     }
7427   else
7428     {
7429       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
7430                           fold_convert (TREE_TYPE (comp_op1), comp_op2));
7431       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
7432                           wide_int_to_tree (TREE_TYPE (comp_op1), offset));
7433     }
7434
7435   comp = fold_convert (type, comp);
7436   comp = force_gimple_operand (comp, &seq, false, NULL);
7437   gimple_seq_add_seq (&stmt_list, seq);
7438   if (gimple_code (use->stmt) != GIMPLE_PHI
7439       /* We can't allow re-allocating the stmt as it might be pointed
7440          to still.  */
7441       && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7442           >= gimple_num_ops (gsi_stmt (bsi))))
7443     {
7444       comp = force_gimple_operand (comp, &seq, true, NULL);
7445       gimple_seq_add_seq (&stmt_list, seq);
7446       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7447         {
7448           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7449           /* As this isn't a plain copy we have to reset alignment
7450              information.  */
7451           if (SSA_NAME_PTR_INFO (comp))
7452             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7453         }
7454     }
7455
7456   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7457   if (gimple_code (use->stmt) == GIMPLE_PHI)
7458     {
7459       ass = gimple_build_assign (tgt, comp);
7460       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7461
7462       bsi = gsi_for_stmt (use->stmt);
7463       remove_phi_node (&bsi, false);
7464     }
7465   else
7466     {
7467       gimple_assign_set_rhs_from_tree (&bsi, comp);
7468       use->stmt = gsi_stmt (bsi);
7469     }
7470 }
7471
7472 /* Performs a peephole optimization to reorder the iv update statement with
7473    a mem ref to enable instruction combining in later phases. The mem ref uses
7474    the iv value before the update, so the reordering transformation requires
7475    adjustment of the offset. CAND is the selected IV_CAND.
7476
7477    Example:
7478
7479    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7480    iv2 = iv1 + 1;
7481
7482    if (t < val)      (1)
7483      goto L;
7484    goto Head;
7485
7486
7487    directly propagating t over to (1) will introduce overlapping live range
7488    thus increase register pressure. This peephole transform it into:
7489
7490
7491    iv2 = iv1 + 1;
7492    t = MEM_REF (base, iv2, 8, 8);
7493    if (t < val)
7494      goto L;
7495    goto Head;
7496 */
7497
7498 static void
7499 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7500 {
7501   tree var_after;
7502   gimple *iv_update, *stmt;
7503   basic_block bb;
7504   gimple_stmt_iterator gsi, gsi_iv;
7505
7506   if (cand->pos != IP_NORMAL)
7507     return;
7508
7509   var_after = cand->var_after;
7510   iv_update = SSA_NAME_DEF_STMT (var_after);
7511
7512   bb = gimple_bb (iv_update);
7513   gsi = gsi_last_nondebug_bb (bb);
7514   stmt = gsi_stmt (gsi);
7515
7516   /* Only handle conditional statement for now.  */
7517   if (gimple_code (stmt) != GIMPLE_COND)
7518     return;
7519
7520   gsi_prev_nondebug (&gsi);
7521   stmt = gsi_stmt (gsi);
7522   if (stmt != iv_update)
7523     return;
7524
7525   gsi_prev_nondebug (&gsi);
7526   if (gsi_end_p (gsi))
7527     return;
7528
7529   stmt = gsi_stmt (gsi);
7530   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7531     return;
7532
7533   if (stmt != use->stmt)
7534     return;
7535
7536   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7537     return;
7538
7539   if (dump_file && (dump_flags & TDF_DETAILS))
7540     {
7541       fprintf (dump_file, "Reordering \n");
7542       print_gimple_stmt (dump_file, iv_update, 0);
7543       print_gimple_stmt (dump_file, use->stmt, 0);
7544       fprintf (dump_file, "\n");
7545     }
7546
7547   gsi = gsi_for_stmt (use->stmt);
7548   gsi_iv = gsi_for_stmt (iv_update);
7549   gsi_move_before (&gsi_iv, &gsi);
7550
7551   cand->pos = IP_BEFORE_USE;
7552   cand->incremented_at = use->stmt;
7553 }
7554
7555 /* Return the alias pointer type that should be used for a MEM_REF
7556    associated with USE, which has type USE_PTR_ADDRESS.  */
7557
7558 static tree
7559 get_alias_ptr_type_for_ptr_address (iv_use *use)
7560 {
7561   gcall *call = as_a <gcall *> (use->stmt);
7562   switch (gimple_call_internal_fn (call))
7563     {
7564     case IFN_MASK_LOAD:
7565     case IFN_MASK_STORE:
7566     case IFN_MASK_LOAD_LANES:
7567     case IFN_MASK_STORE_LANES:
7568     case IFN_LEN_LOAD:
7569     case IFN_LEN_STORE:
7570     case IFN_LEN_MASK_LOAD:
7571     case IFN_LEN_MASK_STORE:
7572       /* The second argument contains the correct alias type.  */
7573       gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7574       return TREE_TYPE (gimple_call_arg (call, 1));
7575
7576     default:
7577       gcc_unreachable ();
7578     }
7579 }
7580
7581
7582 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7583
7584 static void
7585 rewrite_use_address (struct ivopts_data *data,
7586                      struct iv_use *use, struct iv_cand *cand)
7587 {
7588   aff_tree aff;
7589   bool ok;
7590
7591   adjust_iv_update_pos (cand, use);
7592   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7593   gcc_assert (ok);
7594   unshare_aff_combination (&aff);
7595
7596   /* To avoid undefined overflow problems, all IV candidates use unsigned
7597      integer types.  The drawback is that this makes it impossible for
7598      create_mem_ref to distinguish an IV that is based on a memory object
7599      from one that represents simply an offset.
7600
7601      To work around this problem, we pass a hint to create_mem_ref that
7602      indicates which variable (if any) in aff is an IV based on a memory
7603      object.  Note that we only consider the candidate.  If this is not
7604      based on an object, the base of the reference is in some subexpression
7605      of the use -- but these will use pointer types, so they are recognized
7606      by the create_mem_ref heuristics anyway.  */
7607   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7608   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7609   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7610   tree type = use->mem_type;
7611   tree alias_ptr_type;
7612   if (use->type == USE_PTR_ADDRESS)
7613     alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7614   else
7615     {
7616       gcc_assert (type == TREE_TYPE (*use->op_p));
7617       unsigned int align = get_object_alignment (*use->op_p);
7618       if (align != TYPE_ALIGN (type))
7619         type = build_aligned_type (type, align);
7620       alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7621     }
7622   tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7623                              iv, base_hint, data->speed);
7624
7625   if (use->type == USE_PTR_ADDRESS)
7626     {
7627       ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7628       ref = fold_convert (get_use_type (use), ref);
7629       ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7630                                       true, GSI_SAME_STMT);
7631     }
7632   else
7633     copy_ref_info (ref, *use->op_p);
7634
7635   *use->op_p = ref;
7636 }
7637
7638 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7639    candidate CAND.  */
7640
7641 static void
7642 rewrite_use_compare (struct ivopts_data *data,
7643                      struct iv_use *use, struct iv_cand *cand)
7644 {
7645   tree comp, op, bound;
7646   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7647   enum tree_code compare;
7648   struct iv_group *group = data->vgroups[use->group_id];
7649   class cost_pair *cp = get_group_iv_cost (data, group, cand);
7650
7651   bound = cp->value;
7652   if (bound)
7653     {
7654       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7655       tree var_type = TREE_TYPE (var);
7656       gimple_seq stmts;
7657
7658       if (dump_file && (dump_flags & TDF_DETAILS))
7659         {
7660           fprintf (dump_file, "Replacing exit test: ");
7661           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7662         }
7663       compare = cp->comp;
7664       bound = unshare_expr (fold_convert (var_type, bound));
7665       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7666       if (stmts)
7667         gsi_insert_seq_on_edge_immediate (
7668                 loop_preheader_edge (data->current_loop),
7669                 stmts);
7670
7671       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7672       gimple_cond_set_lhs (cond_stmt, var);
7673       gimple_cond_set_code (cond_stmt, compare);
7674       gimple_cond_set_rhs (cond_stmt, op);
7675       return;
7676     }
7677
7678   /* The induction variable elimination failed; just express the original
7679      giv.  */
7680   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7681   gcc_assert (comp != NULL_TREE);
7682   gcc_assert (use->op_p != NULL);
7683   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7684                                          SSA_NAME_VAR (*use->op_p),
7685                                          true, GSI_SAME_STMT);
7686 }
7687
7688 /* Rewrite the groups using the selected induction variables.  */
7689
7690 static void
7691 rewrite_groups (struct ivopts_data *data)
7692 {
7693   unsigned i, j;
7694
7695   for (i = 0; i < data->vgroups.length (); i++)
7696     {
7697       struct iv_group *group = data->vgroups[i];
7698       struct iv_cand *cand = group->selected;
7699
7700       gcc_assert (cand);
7701
7702       if (group->type == USE_NONLINEAR_EXPR)
7703         {
7704           for (j = 0; j < group->vuses.length (); j++)
7705             {
7706               rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7707               update_stmt (group->vuses[j]->stmt);
7708             }
7709         }
7710       else if (address_p (group->type))
7711         {
7712           for (j = 0; j < group->vuses.length (); j++)
7713             {
7714               rewrite_use_address (data, group->vuses[j], cand);
7715               update_stmt (group->vuses[j]->stmt);
7716             }
7717         }
7718       else
7719         {
7720           gcc_assert (group->type == USE_COMPARE);
7721
7722           for (j = 0; j < group->vuses.length (); j++)
7723             {
7724               rewrite_use_compare (data, group->vuses[j], cand);
7725               update_stmt (group->vuses[j]->stmt);
7726             }
7727         }
7728     }
7729 }
7730
7731 /* Removes the ivs that are not used after rewriting.  */
7732
7733 static void
7734 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7735 {
7736   unsigned j;
7737   bitmap_iterator bi;
7738
7739   /* Figure out an order in which to release SSA DEFs so that we don't
7740      release something that we'd have to propagate into a debug stmt
7741      afterwards.  */
7742   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7743     {
7744       struct version_info *info;
7745
7746       info = ver_info (data, j);
7747       if (info->iv
7748           && !integer_zerop (info->iv->step)
7749           && !info->inv_id
7750           && !info->iv->nonlin_use
7751           && !info->preserve_biv)
7752         {
7753           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7754
7755           tree def = info->iv->ssa_name;
7756
7757           if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7758             {
7759               imm_use_iterator imm_iter;
7760               use_operand_p use_p;
7761               gimple *stmt;
7762               int count = 0;
7763
7764               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7765                 {
7766                   if (!gimple_debug_bind_p (stmt))
7767                     continue;
7768
7769                   /* We just want to determine whether to do nothing
7770                      (count == 0), to substitute the computed
7771                      expression into a single use of the SSA DEF by
7772                      itself (count == 1), or to use a debug temp
7773                      because the SSA DEF is used multiple times or as
7774                      part of a larger expression (count > 1). */
7775                   count++;
7776                   if (gimple_debug_bind_get_value (stmt) != def)
7777                     count++;
7778
7779                   if (count > 1)
7780                     break;
7781                 }
7782
7783               if (!count)
7784                 continue;
7785
7786               struct iv_use dummy_use;
7787               struct iv_cand *best_cand = NULL, *cand;
7788               unsigned i, best_pref = 0, cand_pref;
7789               tree comp = NULL_TREE;
7790
7791               memset (&dummy_use, 0, sizeof (dummy_use));
7792               dummy_use.iv = info->iv;
7793               for (i = 0; i < data->vgroups.length () && i < 64; i++)
7794                 {
7795                   cand = data->vgroups[i]->selected;
7796                   if (cand == best_cand)
7797                     continue;
7798                   cand_pref = operand_equal_p (cand->iv->step,
7799                                                info->iv->step, 0)
7800                     ? 4 : 0;
7801                   cand_pref
7802                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7803                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7804                     ? 2 : 0;
7805                   cand_pref
7806                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7807                     ? 1 : 0;
7808                   if (best_cand == NULL || best_pref < cand_pref)
7809                     {
7810                       tree this_comp
7811                         = get_debug_computation_at (data->current_loop,
7812                                                     SSA_NAME_DEF_STMT (def),
7813                                                     &dummy_use, cand);
7814                       if (this_comp)
7815                         {
7816                           best_cand = cand;
7817                           best_pref = cand_pref;
7818                           comp = this_comp;
7819                         }
7820                     }
7821                 }
7822
7823               if (!best_cand)
7824                 continue;
7825
7826               comp = unshare_expr (comp);
7827               if (count > 1)
7828                 {
7829                   tree vexpr = build_debug_expr_decl (TREE_TYPE (comp));
7830                   /* FIXME: Is setting the mode really necessary? */
7831                   if (SSA_NAME_VAR (def))
7832                     SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7833                   else
7834                     SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7835                   gdebug *def_temp
7836                     = gimple_build_debug_bind (vexpr, comp, NULL);
7837                   gimple_stmt_iterator gsi;
7838
7839                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7840                     gsi = gsi_after_labels (gimple_bb
7841                                             (SSA_NAME_DEF_STMT (def)));
7842                   else
7843                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7844
7845                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7846                   comp = vexpr;
7847                 }
7848
7849               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7850                 {
7851                   if (!gimple_debug_bind_p (stmt))
7852                     continue;
7853
7854                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7855                     SET_USE (use_p, comp);
7856
7857                   update_stmt (stmt);
7858                 }
7859             }
7860         }
7861     }
7862 }
7863
7864 /* Frees memory occupied by class tree_niter_desc in *VALUE. Callback
7865    for hash_map::traverse.  */
7866
7867 bool
7868 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7869 {
7870   free (value);
7871   return true;
7872 }
7873
7874 /* Frees data allocated by the optimization of a single loop.  */
7875
7876 static void
7877 free_loop_data (struct ivopts_data *data)
7878 {
7879   unsigned i, j;
7880   bitmap_iterator bi;
7881   tree obj;
7882
7883   if (data->niters)
7884     {
7885       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7886       delete data->niters;
7887       data->niters = NULL;
7888     }
7889
7890   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7891     {
7892       struct version_info *info;
7893
7894       info = ver_info (data, i);
7895       info->iv = NULL;
7896       info->has_nonlin_use = false;
7897       info->preserve_biv = false;
7898       info->inv_id = 0;
7899     }
7900   bitmap_clear (data->relevant);
7901   bitmap_clear (data->important_candidates);
7902
7903   for (i = 0; i < data->vgroups.length (); i++)
7904     {
7905       struct iv_group *group = data->vgroups[i];
7906
7907       for (j = 0; j < group->vuses.length (); j++)
7908         free (group->vuses[j]);
7909       group->vuses.release ();
7910
7911       BITMAP_FREE (group->related_cands);
7912       for (j = 0; j < group->n_map_members; j++)
7913         {
7914           if (group->cost_map[j].inv_vars)
7915             BITMAP_FREE (group->cost_map[j].inv_vars);
7916           if (group->cost_map[j].inv_exprs)
7917             BITMAP_FREE (group->cost_map[j].inv_exprs);
7918         }
7919
7920       free (group->cost_map);
7921       free (group);
7922     }
7923   data->vgroups.truncate (0);
7924
7925   for (i = 0; i < data->vcands.length (); i++)
7926     {
7927       struct iv_cand *cand = data->vcands[i];
7928
7929       if (cand->inv_vars)
7930         BITMAP_FREE (cand->inv_vars);
7931       if (cand->inv_exprs)
7932         BITMAP_FREE (cand->inv_exprs);
7933       free (cand);
7934     }
7935   data->vcands.truncate (0);
7936
7937   if (data->version_info_size < num_ssa_names)
7938     {
7939       data->version_info_size = 2 * num_ssa_names;
7940       free (data->version_info);
7941       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7942     }
7943
7944   data->max_inv_var_id = 0;
7945   data->max_inv_expr_id = 0;
7946
7947   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7948     SET_DECL_RTL (obj, NULL_RTX);
7949
7950   decl_rtl_to_reset.truncate (0);
7951
7952   data->inv_expr_tab->empty ();
7953
7954   data->iv_common_cand_tab->empty ();
7955   data->iv_common_cands.truncate (0);
7956 }
7957
7958 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7959    loop tree.  */
7960
7961 static void
7962 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7963 {
7964   free_loop_data (data);
7965   free (data->version_info);
7966   BITMAP_FREE (data->relevant);
7967   BITMAP_FREE (data->important_candidates);
7968
7969   decl_rtl_to_reset.release ();
7970   data->vgroups.release ();
7971   data->vcands.release ();
7972   delete data->inv_expr_tab;
7973   data->inv_expr_tab = NULL;
7974   free_affine_expand_cache (&data->name_expansion_cache);
7975   if (data->base_object_map)
7976     delete data->base_object_map;
7977   delete data->iv_common_cand_tab;
7978   data->iv_common_cand_tab = NULL;
7979   data->iv_common_cands.release ();
7980   obstack_free (&data->iv_obstack, NULL);
7981 }
7982
7983 /* Returns true if the loop body BODY includes any function calls.  */
7984
7985 static bool
7986 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7987 {
7988   gimple_stmt_iterator gsi;
7989   unsigned i;
7990
7991   for (i = 0; i < num_nodes; i++)
7992     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7993       {
7994         gimple *stmt = gsi_stmt (gsi);
7995         if (is_gimple_call (stmt)
7996             && !gimple_call_internal_p (stmt)
7997             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7998           return true;
7999       }
8000   return false;
8001 }
8002
8003 /* Determine cost scaling factor for basic blocks in loop.  */
8004 #define COST_SCALING_FACTOR_BOUND (20)
8005
8006 static void
8007 determine_scaling_factor (struct ivopts_data *data, basic_block *body)
8008 {
8009   int lfreq = data->current_loop->header->count.to_frequency (cfun);
8010   if (!data->speed || lfreq <= 0)
8011     return;
8012
8013   int max_freq = lfreq;
8014   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8015     {
8016       body[i]->aux = (void *)(intptr_t) 1;
8017       if (max_freq < body[i]->count.to_frequency (cfun))
8018         max_freq = body[i]->count.to_frequency (cfun);
8019     }
8020   if (max_freq > lfreq)
8021     {
8022       int divisor, factor;
8023       /* Check if scaling factor itself needs to be scaled by the bound.  This
8024          is to avoid overflow when scaling cost according to profile info.  */
8025       if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
8026         {
8027           divisor = max_freq;
8028           factor = COST_SCALING_FACTOR_BOUND;
8029         }
8030       else
8031         {
8032           divisor = lfreq;
8033           factor = 1;
8034         }
8035       for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8036         {
8037           int bfreq = body[i]->count.to_frequency (cfun);
8038           if (bfreq <= lfreq)
8039             continue;
8040
8041           body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
8042         }
8043     }
8044 }
8045
8046 /* Find doloop comparison use and set its doloop_p on if found.  */
8047
8048 static bool
8049 find_doloop_use (struct ivopts_data *data)
8050 {
8051   struct loop *loop = data->current_loop;
8052
8053   for (unsigned i = 0; i < data->vgroups.length (); i++)
8054     {
8055       struct iv_group *group = data->vgroups[i];
8056       if (group->type == USE_COMPARE)
8057         {
8058           gcc_assert (group->vuses.length () == 1);
8059           struct iv_use *use = group->vuses[0];
8060           gimple *stmt = use->stmt;
8061           if (gimple_code (stmt) == GIMPLE_COND)
8062             {
8063               basic_block bb = gimple_bb (stmt);
8064               edge true_edge, false_edge;
8065               extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
8066               /* This comparison is used for loop latch.  Require latch is empty
8067                  for now.  */
8068               if ((loop->latch == true_edge->dest
8069                    || loop->latch == false_edge->dest)
8070                   && empty_block_p (loop->latch))
8071                 {
8072                   group->doloop_p = true;
8073                   if (dump_file && (dump_flags & TDF_DETAILS))
8074                     {
8075                       fprintf (dump_file, "Doloop cmp iv use: ");
8076                       print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
8077                     }
8078                   return true;
8079                 }
8080             }
8081         }
8082     }
8083
8084   return false;
8085 }
8086
8087 /* For the targets which support doloop, to predict whether later RTL doloop
8088    transformation will perform on this loop, further detect the doloop use and
8089    mark the flag doloop_use_p if predicted.  */
8090
8091 void
8092 analyze_and_mark_doloop_use (struct ivopts_data *data)
8093 {
8094   data->doloop_use_p = false;
8095
8096   if (!flag_branch_on_count_reg)
8097     return;
8098
8099   if (data->current_loop->unroll == USHRT_MAX)
8100     return;
8101
8102   if (!generic_predict_doloop_p (data))
8103     return;
8104
8105   if (find_doloop_use (data))
8106     {
8107       data->doloop_use_p = true;
8108       if (dump_file && (dump_flags & TDF_DETAILS))
8109         {
8110           struct loop *loop = data->current_loop;
8111           fprintf (dump_file,
8112                    "Predict loop %d can perform"
8113                    " doloop optimization later.\n",
8114                    loop->num);
8115           flow_loop_dump (loop, dump_file, NULL, 1);
8116         }
8117     }
8118 }
8119
8120 /* Optimizes the LOOP.  Returns true if anything changed.  */
8121
8122 static bool
8123 tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop,
8124                            bitmap toremove)
8125 {
8126   bool changed = false;
8127   class iv_ca *iv_ca;
8128   edge exit = single_dom_exit (loop);
8129   basic_block *body;
8130
8131   gcc_assert (!data->niters);
8132   data->current_loop = loop;
8133   data->loop_loc = find_loop_location (loop).get_location_t ();
8134   data->speed = optimize_loop_for_speed_p (loop);
8135
8136   if (dump_file && (dump_flags & TDF_DETAILS))
8137     {
8138       fprintf (dump_file, "Processing loop %d", loop->num);
8139       if (data->loop_loc != UNKNOWN_LOCATION)
8140         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
8141                  LOCATION_LINE (data->loop_loc));
8142       fprintf (dump_file, "\n");
8143
8144       if (exit)
8145         {
8146           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
8147                    exit->src->index, exit->dest->index);
8148           print_gimple_stmt (dump_file, *gsi_last_bb (exit->src),
8149                              0, TDF_SLIM);
8150           fprintf (dump_file, "\n");
8151         }
8152
8153       fprintf (dump_file, "\n");
8154     }
8155
8156   body = get_loop_body (loop);
8157   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
8158   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
8159
8160   data->loop_single_exit_p
8161     = exit != NULL && loop_only_exit_p (loop, body, exit);
8162
8163   /* For each ssa name determines whether it behaves as an induction variable
8164      in some loop.  */
8165   if (!find_induction_variables (data, body))
8166     goto finish;
8167
8168   /* Finds interesting uses (item 1).  */
8169   find_interesting_uses (data, body);
8170   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
8171     goto finish;
8172
8173   /* Determine cost scaling factor for basic blocks in loop.  */
8174   determine_scaling_factor (data, body);
8175
8176   /* Analyze doloop possibility and mark the doloop use if predicted.  */
8177   analyze_and_mark_doloop_use (data);
8178
8179   /* Finds candidates for the induction variables (item 2).  */
8180   find_iv_candidates (data);
8181
8182   /* Calculates the costs (item 3, part 1).  */
8183   determine_iv_costs (data);
8184   determine_group_iv_costs (data);
8185   determine_set_costs (data);
8186
8187   /* Find the optimal set of induction variables (item 3, part 2).  */
8188   iv_ca = find_optimal_iv_set (data);
8189   /* Cleanup basic block aux field.  */
8190   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8191     body[i]->aux = NULL;
8192   if (!iv_ca)
8193     goto finish;
8194   changed = true;
8195
8196   /* Create the new induction variables (item 4, part 1).  */
8197   create_new_ivs (data, iv_ca);
8198   iv_ca_free (&iv_ca);
8199
8200   /* Rewrite the uses (item 4, part 2).  */
8201   rewrite_groups (data);
8202
8203   /* Remove the ivs that are unused after rewriting.  */
8204   remove_unused_ivs (data, toremove);
8205
8206 finish:
8207   free (body);
8208   free_loop_data (data);
8209
8210   return changed;
8211 }
8212
8213 /* Main entry point.  Optimizes induction variables in loops.  */
8214
8215 void
8216 tree_ssa_iv_optimize (void)
8217 {
8218   struct ivopts_data data;
8219   auto_bitmap toremove;
8220
8221   tree_ssa_iv_optimize_init (&data);
8222   mark_ssa_maybe_undefs ();
8223
8224   /* Optimize the loops starting with the innermost ones.  */
8225   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
8226     {
8227       if (!dbg_cnt (ivopts_loop))
8228         continue;
8229
8230       if (dump_file && (dump_flags & TDF_DETAILS))
8231         flow_loop_dump (loop, dump_file, NULL, 1);
8232
8233       tree_ssa_iv_optimize_loop (&data, loop, toremove);
8234     }
8235
8236   /* Remove eliminated IV defs.  */
8237   release_defs_bitset (toremove);
8238
8239   /* We have changed the structure of induction variables; it might happen
8240      that definitions in the scev database refer to some of them that were
8241      eliminated.  */
8242   scev_reset_htab ();
8243   /* Likewise niter and control-IV information.  */
8244   free_numbers_of_iterations_estimates (cfun);
8245
8246   tree_ssa_iv_optimize_finalize (&data);
8247 }
8248
8249 #include "gt-tree-ssa-loop-ivopts.h"